diff --git a/src/backend/src/controller/directory_manager.py b/src/backend/src/controller/directory_manager.py index 1e42dbcc..7130d94c 100644 --- a/src/backend/src/controller/directory_manager.py +++ b/src/backend/src/controller/directory_manager.py @@ -2,17 +2,24 @@ Reads provider configuration from ``app_settings``, dispatches to the right concrete ``DirectoryProvider``, and caches search results in -memory for 5 minutes per (provider_type, query_shape) key. The cache -is per-instance and the manager is held as a singleton on +memory for 5 minutes per (provider_type, config_signature, query) key. +The cache is per-instance and the manager is held as a singleton on ``app.state``. The manager itself is provider-agnostic: adding a new provider is a -matter of registering another class in ``_PROVIDER_REGISTRY``. +matter of: + +1. Implementing ``DirectoryProvider`` in ``src.controller.directory_providers``. +2. Registering it in ``_PROVIDER_REGISTRY`` below. +3. (If the provider needs new settings) adding the keys to + ``DirectoryProviderConfig`` and to the read/write paths here. + +No changes to routes or models otherwise. """ import time from threading import Lock -from typing import Any, Callable, Dict, List, Optional, Tuple +from typing import Callable, Dict, List, Optional, Tuple from sqlalchemy.orm import Session @@ -20,13 +27,19 @@ from src.controller.directory_providers import ( DirectoryError, DirectoryProvider, + DirectoryProviderConfig, + DirectoryProviderContext, EntraIdProvider, + FileProvider, + LakebaseProvider, ) from src.models.directory import ( DirectoryProviderType, DirectoryStatus, Principal, SETTING_KEY_CONNECTION_NAME, + SETTING_KEY_FILE_PATH, + SETTING_KEY_LAKEBASE_TABLE, SETTING_KEY_PROVIDER_TYPE, ) from src.repositories.app_settings_repository import app_settings_repo @@ -38,11 +51,27 @@ _DEFAULT_SEARCH_LIMIT = 20 -# Provider registry. Adding a new provider requires only an entry here -# plus an implementation in src.controller.directory_providers; the -# manager, routes, and models stay untouched. -_PROVIDER_REGISTRY: Dict[str, Callable[[Any, str], DirectoryProvider]] = { +# Provider registry. Each factory takes (context, config) and returns +# a DirectoryProvider. Adding a new provider requires only an entry +# here plus an implementation in src.controller.directory_providers; +# routes and models stay untouched. +ProviderFactory = Callable[ + [DirectoryProviderContext, DirectoryProviderConfig], DirectoryProvider +] +_PROVIDER_REGISTRY: Dict[str, ProviderFactory] = { DirectoryProviderType.ENTRA.value: EntraIdProvider, + DirectoryProviderType.LAKEBASE.value: LakebaseProvider, + DirectoryProviderType.FILE.value: FileProvider, +} + + +# Each provider declares which settings keys are required so the +# manager can decide ``configured=True/False`` without instantiating +# the provider. +_REQUIRED_KEYS: Dict[str, Tuple[str, ...]] = { + DirectoryProviderType.ENTRA.value: (SETTING_KEY_CONNECTION_NAME,), + DirectoryProviderType.LAKEBASE.value: (SETTING_KEY_LAKEBASE_TABLE,), + DirectoryProviderType.FILE.value: (SETTING_KEY_FILE_PATH,), } @@ -54,30 +83,34 @@ class DirectoryManager: """ def __init__(self) -> None: - self._cache: Dict[Tuple[str, str, str, str], Tuple[float, List[Principal]]] = {} + self._cache: Dict[ + Tuple[str, Tuple[Optional[str], ...], str, str], + Tuple[float, List[Principal]], + ] = {} self._lock = Lock() - # Track which (provider_type, connection_name) tuple the cache - # was filled for; flip => purge. - self._cache_keyed_on: Optional[Tuple[str, str]] = None + # Track which (provider_type, config_signature) tuple the + # cache was filled for; flip => purge. + self._cache_keyed_on: Optional[Tuple[str, Tuple[Optional[str], ...]]] = None # ----- public API --------------------------------------------------------- def get_status(self, db: Session) -> DirectoryStatus: """Return the live ``configured`` flag plus a redaction-safe summary.""" - provider_type = app_settings_repo.get_by_key(db, SETTING_KEY_PROVIDER_TYPE) - connection_name = app_settings_repo.get_by_key(db, SETTING_KEY_CONNECTION_NAME) - configured = bool(provider_type) and bool(connection_name) and provider_type in _PROVIDER_REGISTRY + provider_type, config = self._read_settings(db) + configured = self._is_configured(provider_type, config) return DirectoryStatus( configured=configured, - provider_type=provider_type if provider_type else None, - connection_name=connection_name if connection_name else None, + provider_type=provider_type or None, + connection_name=config.connection_name, + lakebase_table=config.lakebase_table, + file_path=config.file_path, ) def search( self, db: Session, - ws_client: Any, + ctx: DirectoryProviderContext, query: str, types: List[str], limit: int = _DEFAULT_SEARCH_LIMIT, @@ -90,29 +123,35 @@ def search( directory is not configured. """ - provider_type, connection_name = self._read_settings(db) - if not provider_type or not connection_name: + provider_type, config = self._read_settings(db) + if not self._is_configured(provider_type, config): return [] - self._invalidate_if_keyed_changed(provider_type, connection_name) + assert provider_type is not None # narrowed by _is_configured + signature = config.signature() + self._invalidate_if_keyed_changed(provider_type, signature) wanted = {t for t in types if t in {"user", "group"}} or {"user", "group"} results: List[Principal] = [] seen: set = set() - provider = self._build_provider(provider_type, ws_client, connection_name) + provider = self._build_provider(provider_type, ctx, config) if "user" in wanted: - for p in self._cached(provider_type, connection_name, "user", query, limit, - lambda: provider.search_users(query, limit)): + for p in self._cached( + provider_type, signature, "user", query, limit, + lambda: provider.search_users(query, limit), + ): key = (p.type, p.id) if key not in seen: seen.add(key) results.append(p) if "group" in wanted: - for p in self._cached(provider_type, connection_name, "group", query, limit, - lambda: provider.search_groups(query, limit)): + for p in self._cached( + provider_type, signature, "group", query, limit, + lambda: provider.search_groups(query, limit), + ): key = (p.type, p.id) if key not in seen: seen.add(key) @@ -121,15 +160,19 @@ def search( # Honour the caller's overall limit even after cross-type merge. return results[:limit] - def test(self, db: Session, ws_client: Any) -> None: + def test(self, db: Session, ctx: DirectoryProviderContext) -> None: """Probe the configured provider. Raises ``DirectoryError`` if unhealthy.""" - provider_type, connection_name = self._read_settings(db) + provider_type, config = self._read_settings(db) if not provider_type: raise DirectoryError("Directory provider is not configured") - if not connection_name: - raise DirectoryError("UC HTTP connection name is not configured") - provider = self._build_provider(provider_type, ws_client, connection_name) + if not self._is_configured(provider_type, config): + missing = _REQUIRED_KEYS.get(provider_type, ()) + raise DirectoryError( + f"Provider {provider_type!r} is missing required setting(s): " + f"{', '.join(missing)}" + ) + provider = self._build_provider(provider_type, ctx, config) provider.test() def invalidate_cache(self) -> None: @@ -141,27 +184,64 @@ def invalidate_cache(self) -> None: # ----- internals ---------------------------------------------------------- - def _read_settings(self, db: Session) -> Tuple[Optional[str], Optional[str]]: + def _read_settings( + self, db: Session, + ) -> Tuple[Optional[str], DirectoryProviderConfig]: provider_type = app_settings_repo.get_by_key(db, SETTING_KEY_PROVIDER_TYPE) - connection_name = app_settings_repo.get_by_key(db, SETTING_KEY_CONNECTION_NAME) - return (provider_type or None), (connection_name or None) + config = DirectoryProviderConfig( + connection_name=app_settings_repo.get_by_key(db, SETTING_KEY_CONNECTION_NAME) or None, + lakebase_table=app_settings_repo.get_by_key(db, SETTING_KEY_LAKEBASE_TABLE) or None, + file_path=app_settings_repo.get_by_key(db, SETTING_KEY_FILE_PATH) or None, + ) + return (provider_type or None), config + + def _is_configured( + self, provider_type: Optional[str], config: DirectoryProviderConfig, + ) -> bool: + if not provider_type or provider_type not in _PROVIDER_REGISTRY: + return False + required = _REQUIRED_KEYS.get(provider_type, ()) + # Translate setting keys to config-field names. + key_to_field = { + SETTING_KEY_CONNECTION_NAME: "connection_name", + SETTING_KEY_LAKEBASE_TABLE: "lakebase_table", + SETTING_KEY_FILE_PATH: "file_path", + } + for key in required: + field = key_to_field.get(key) + if field is None: + # Defensive: an unknown required key means the + # registry is misconfigured at code level. Treat as + # not-configured rather than crash. + logger.warning( + "Provider %s declares unknown required setting key %s", + provider_type, key, + ) + return False + if not getattr(config, field): + return False + return True def _build_provider( self, provider_type: str, - ws_client: Any, - connection_name: str, + ctx: DirectoryProviderContext, + config: DirectoryProviderConfig, ) -> DirectoryProvider: factory = _PROVIDER_REGISTRY.get(provider_type) if factory is None: raise DirectoryError( f"Unknown directory provider type: {provider_type!r}" ) - return factory(ws_client, connection_name) + return factory(ctx, config) - def _invalidate_if_keyed_changed(self, provider_type: str, connection_name: str) -> None: + def _invalidate_if_keyed_changed( + self, + provider_type: str, + signature: Tuple[Optional[str], ...], + ) -> None: with self._lock: - current = (provider_type, connection_name) + current = (provider_type, signature) if self._cache_keyed_on is not None and self._cache_keyed_on != current: self._cache.clear() self._cache_keyed_on = current @@ -169,7 +249,7 @@ def _invalidate_if_keyed_changed(self, provider_type: str, connection_name: str) def _cached( self, provider_type: str, - connection_name: str, + signature: Tuple[Optional[str], ...], kind: str, query: str, limit: int, @@ -177,7 +257,7 @@ def _cached( ) -> List[Principal]: # Normalise the query so capitalisation / surrounding whitespace # doesn't bypass the cache. - cache_key = (provider_type, connection_name, kind, f"{query.strip().lower()}|{limit}") + cache_key = (provider_type, signature, kind, f"{query.strip().lower()}|{limit}") now = time.monotonic() with self._lock: entry = self._cache.get(cache_key) @@ -202,18 +282,25 @@ def _cached( def register_provider( provider_type: str, - factory: Callable[[Any, str], DirectoryProvider], + factory: ProviderFactory, + *, + required_keys: Tuple[str, ...] = (), ) -> None: """Register an additional provider implementation at runtime. Used by tests to inject stub providers without touching the - production registry. + production registry. ``required_keys`` mirrors the production + convention so the manager can compute ``configured`` correctly + for the stub too; default empty means "no required settings". """ _PROVIDER_REGISTRY[provider_type] = factory + if required_keys: + _REQUIRED_KEYS[provider_type] = required_keys def unregister_provider(provider_type: str) -> None: """Inverse of :func:`register_provider`, primarily for test teardown.""" _PROVIDER_REGISTRY.pop(provider_type, None) + _REQUIRED_KEYS.pop(provider_type, None) diff --git a/src/backend/src/controller/directory_providers/__init__.py b/src/backend/src/controller/directory_providers/__init__.py index 9afa7266..cb84b879 100644 --- a/src/backend/src/controller/directory_providers/__init__.py +++ b/src/backend/src/controller/directory_providers/__init__.py @@ -1,25 +1,41 @@ """Directory provider plug-ins. -Each concrete provider talks to its IdP exclusively via a Unity Catalog -HTTP Connection so UC owns OAuth2 client-credentials acquisition, -caching, and refresh. The app stores no client secret and no token -cache. +Concrete providers shipped with the app: -Field mapping (Graph ``userPrincipalName`` vs Okta ``profile.login`` vs -...) lives entirely inside each provider; the manager and routes only -ever see normalised ``Principal`` instances. +- ``EntraIdProvider`` — Microsoft Entra ID via Microsoft Graph (UC HTTP) +- ``LakebaseProvider`` — Postgres table backed (the app's own Lakebase DB) +- ``FileProvider`` — Local CSV file (primarily for tests / demos) + +Each provider receives a typed ``DirectoryProviderContext`` (transport +handles: SDK workspace client, SQLAlchemy engine, ...) and a +``DirectoryProviderConfig`` (all directory settings in one bag). The +provider reads only the fields relevant to its type and raises +``DirectoryError`` on missing / invalid required values. + +Field mapping lives entirely inside each provider; the manager and +routes only ever see normalised ``Principal`` instances. """ from src.controller.directory_providers.base import ( DirectoryError, DirectoryProvider, + DirectoryProviderConfig, + DirectoryProviderContext, ) from src.controller.directory_providers.entra_id_provider import ( EntraIdProvider, ) +from src.controller.directory_providers.file_provider import FileProvider +from src.controller.directory_providers.lakebase_provider import ( + LakebaseProvider, +) __all__ = [ "DirectoryError", "DirectoryProvider", + "DirectoryProviderConfig", + "DirectoryProviderContext", "EntraIdProvider", + "FileProvider", + "LakebaseProvider", ] diff --git a/src/backend/src/controller/directory_providers/base.py b/src/backend/src/controller/directory_providers/base.py index 77a4be39..90890f64 100644 --- a/src/backend/src/controller/directory_providers/base.py +++ b/src/backend/src/controller/directory_providers/base.py @@ -1,7 +1,16 @@ -"""Abstract DirectoryProvider interface implemented by every concrete provider.""" +"""Abstract DirectoryProvider interface implemented by every concrete provider. + +Providers receive a small ``DirectoryProviderContext`` so they can pull +in whatever transport they need (Databricks SDK workspace client for +Entra; SQLAlchemy engine for Lakebase; filesystem for File). They also +receive a typed ``DirectoryProviderConfig`` carrying every directory +setting -- each provider reads only the fields relevant to its type +and raises ``DirectoryError`` on missing required values. +""" from abc import ABC, abstractmethod -from typing import List +from dataclasses import dataclass +from typing import Any, List, Optional, Tuple from src.models.directory import Principal @@ -15,13 +24,46 @@ class DirectoryError(Exception): """ +@dataclass +class DirectoryProviderContext: + """Per-instance transport handles a provider may need. + + Populated by ``DirectoryManager`` at provider-build time. Adding a + new context field requires only updating the manager that builds + it -- providers ignore fields they do not need. + """ + + ws_client: Any = None + db_engine: Any = None + + +@dataclass +class DirectoryProviderConfig: + """All directory settings in one bag. + + Each provider reads only the fields relevant to its type. Unused + fields are simply ignored. This keeps the registry signature + stable as more providers come online. + """ + + connection_name: Optional[str] = None # entra + lakebase_table: Optional[str] = None # lakebase + file_path: Optional[str] = None # file + + def signature(self) -> Tuple[Optional[str], ...]: + """A hashable representation used for cache invalidation.""" + + return (self.connection_name, self.lakebase_table, self.file_path) + + class DirectoryProvider(ABC): """Provider plug-in contract. Every method must return normalised ``Principal`` instances and is responsible for safe escaping of the caller-supplied ``prefix`` / - ``id`` against its own query syntax (OData for Graph, SCIM for - Okta, etc.). The manager does not sanitise these strings. + ``id`` against its own query syntax (OData for Graph, parameterised + SQL for Lakebase, etc.). The manager does not sanitise these + strings. """ @abstractmethod diff --git a/src/backend/src/controller/directory_providers/entra_id_provider.py b/src/backend/src/controller/directory_providers/entra_id_provider.py index a8ce30b9..bf2104a6 100644 --- a/src/backend/src/controller/directory_providers/entra_id_provider.py +++ b/src/backend/src/controller/directory_providers/entra_id_provider.py @@ -22,6 +22,8 @@ from src.controller.directory_providers.base import ( DirectoryError, DirectoryProvider, + DirectoryProviderConfig, + DirectoryProviderContext, ) from src.models.directory import Principal, PrincipalType @@ -57,11 +59,17 @@ class EntraIdProvider(DirectoryProvider): lives in ``DirectoryManager``. """ - def __init__(self, ws_client: Any, connection_name: str) -> None: - if not connection_name: + def __init__( + self, + ctx: DirectoryProviderContext, + config: DirectoryProviderConfig, + ) -> None: + if not config.connection_name: raise DirectoryError("UC HTTP connection name is required") - self._ws = ws_client - self._connection_name = connection_name + if ctx.ws_client is None: + raise DirectoryError("Workspace client is required for Entra provider") + self._ws = ctx.ws_client + self._connection_name = config.connection_name # ----- DirectoryProvider -------------------------------------------------- diff --git a/src/backend/src/controller/directory_providers/file_provider.py b/src/backend/src/controller/directory_providers/file_provider.py new file mode 100644 index 00000000..a5ddb37b --- /dev/null +++ b/src/backend/src/controller/directory_providers/file_provider.py @@ -0,0 +1,177 @@ +"""CSV-file-backed directory provider. + +Primarily intended for tests, demos, and offline development. The +file path is taken from the ``DIRECTORY_FILE_PATH`` setting. Format: + + type,id,display_name,sub_label + user,alice@example.com,Alice Liddell,alice@example.com + group,Producers,Data Producers,producers-guid + +The file is re-read from disk whenever its ``mtime`` advances. We do +not watch the filesystem -- the manager's per-request settings read +plus the existing 5-min cache TTL make polling unnecessary. +""" + +from __future__ import annotations + +import csv +import os +from threading import Lock +from typing import List, Optional + +from src.common.logging import get_logger +from src.controller.directory_providers.base import ( + DirectoryError, + DirectoryProvider, + DirectoryProviderConfig, + DirectoryProviderContext, +) +from src.models.directory import Principal, PrincipalType + +logger = get_logger(__name__) + + +# Expected CSV columns. Extra columns are ignored. +_REQUIRED_COLUMNS = {"type", "id", "display_name"} + + +class FileProvider(DirectoryProvider): + """Provider that reads principals from a CSV file on disk.""" + + # Class-level cache keyed on file path so multiple provider + # instances against the same file share the parsed contents. + _cache: dict = {} + _cache_lock = Lock() + + def __init__( + self, + ctx: DirectoryProviderContext, # noqa: ARG002 - ctx is part of the contract + config: DirectoryProviderConfig, + ) -> None: + if not config.file_path: + raise DirectoryError("File path is required for File provider") + self._path = config.file_path + + # ----- DirectoryProvider -------------------------------------------------- + + def search_users(self, prefix: str, top: int) -> List[Principal]: + return self._search("user", prefix, top) + + def search_groups(self, prefix: str, top: int) -> List[Principal]: + return self._search("group", prefix, top) + + def get_user(self, id: str) -> Principal: + return self._get("user", id) + + def get_group(self, id: str) -> Principal: + return self._get("group", id) + + def test(self) -> None: + # Force a re-read so misconfigured paths / malformed CSVs + # surface as DirectoryError immediately rather than at the + # next search call. + self._load(force_reload=True) + + # ----- internals ---------------------------------------------------------- + + def _search(self, kind: str, prefix: str, top: int) -> List[Principal]: + if not prefix: + return [] + rows = self._load() + needle = prefix.lower() + out: List[Principal] = [] + for r in rows: + if r.type.value != kind: + continue + if r.display_name.lower().startswith(needle) or r.id.lower().startswith(needle): + out.append(r) + if len(out) >= top: + break + return out + + def _get(self, kind: str, id: str) -> Principal: + if not id: + raise DirectoryError(f"Empty {kind} id") + for r in self._load(): + if r.type.value == kind and r.id == id: + return r + raise DirectoryError(f"{kind.capitalize()} {id!r} not found in CSV") + + def _load(self, force_reload: bool = False) -> List[Principal]: + if not os.path.isfile(self._path): + raise DirectoryError(f"CSV file not found: {self._path}") + try: + mtime = os.path.getmtime(self._path) + except OSError as exc: + raise DirectoryError(f"Cannot stat CSV file {self._path}: {exc}") from exc + + with FileProvider._cache_lock: + entry = FileProvider._cache.get(self._path) + if entry and not force_reload and entry[0] == mtime: + return entry[1] + + rows = _read_csv(self._path) + with FileProvider._cache_lock: + FileProvider._cache[self._path] = (mtime, rows) + return rows + + +def _read_csv(path: str) -> List[Principal]: + """Parse the CSV. Raises ``DirectoryError`` on malformed input.""" + + try: + with open(path, "r", encoding="utf-8", newline="") as fh: + reader = csv.DictReader(fh) + if not reader.fieldnames: + raise DirectoryError(f"CSV {path} has no header row") + missing = _REQUIRED_COLUMNS - {c.strip() for c in reader.fieldnames} + if missing: + raise DirectoryError( + f"CSV {path} is missing required columns: {sorted(missing)}" + ) + out: List[Principal] = [] + for idx, raw in enumerate(reader, start=2): # header is row 1 + principal = _row_to_principal(raw, path=path, lineno=idx) + if principal is not None: + out.append(principal) + return out + except DirectoryError: + raise + except Exception as exc: + raise DirectoryError(f"Failed to read CSV {path}: {exc}") from exc + + +def _row_to_principal( + row: dict, *, path: str, lineno: int, +) -> Optional[Principal]: + """Best-effort row -> Principal mapping. Skips obviously empty rows.""" + + raw_type = (row.get("type") or "").strip().lower() + raw_id = (row.get("id") or "").strip() + display_name = (row.get("display_name") or "").strip() + sub_label = (row.get("sub_label") or "").strip() or None + + if not raw_id and not display_name and not raw_type: + return None # blank row -- skip silently + if raw_type not in {"user", "group"}: + raise DirectoryError( + f"{path} line {lineno}: type must be 'user' or 'group', got {raw_type!r}" + ) + if not raw_id: + raise DirectoryError(f"{path} line {lineno}: id is required") + if not display_name: + display_name = raw_id + + return Principal( + type=PrincipalType.USER if raw_type == "user" else PrincipalType.GROUP, + id=raw_id, + display_name=display_name, + sub_label=sub_label, + ) + + +def _clear_cache_for_tests() -> None: + """Used only by the test suite to reset between cases.""" + + with FileProvider._cache_lock: + FileProvider._cache.clear() diff --git a/src/backend/src/controller/directory_providers/lakebase_provider.py b/src/backend/src/controller/directory_providers/lakebase_provider.py new file mode 100644 index 00000000..12b106b7 --- /dev/null +++ b/src/backend/src/controller/directory_providers/lakebase_provider.py @@ -0,0 +1,179 @@ +"""Postgres / Lakebase-backed directory provider. + +Reads principals from a Postgres table sitting on the app's primary +database (Lakebase in production, any Postgres in dev). The table +shape is: + + CREATE TABLE ( + type TEXT NOT NULL, -- 'user' | 'group' + id TEXT NOT NULL, -- UPN/email for users, displayName for groups + display_name TEXT NOT NULL, + sub_label TEXT + ); + +The fully-qualified name (``catalog.schema.table`` or +``schema.table``) is stored in the ``DIRECTORY_LAKEBASE_TABLE`` +setting. We validate the identifier syntax at query-build time to +avoid SQL injection via the table name; column values are always +passed as bind parameters. +""" + +from __future__ import annotations + +import re +from typing import Any, List + +from sqlalchemy import text + +from src.common.logging import get_logger +from src.controller.directory_providers.base import ( + DirectoryError, + DirectoryProvider, + DirectoryProviderConfig, + DirectoryProviderContext, +) +from src.models.directory import Principal, PrincipalType + +logger = get_logger(__name__) + + +# Postgres identifier rules we accept: alphanumeric + underscore + optional +# dotted parts. Anything else triggers DirectoryError so the table name +# never enters SQL untrusted. +_IDENT_PART = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") + + +def _validate_fqn(fqn: str) -> str: + """Return ``fqn`` quoted segment-by-segment, or raise on invalid input. + + The setting is operator-supplied so we don't need to handle every + legal Postgres identifier (mixed case, embedded dots, etc.) -- the + UI tells users to use ``catalog.schema.table`` or ``schema.table``. + """ + + if not fqn: + raise DirectoryError("Lakebase table name is required") + parts = fqn.split(".") + if not (1 <= len(parts) <= 3): + raise DirectoryError( + f"Lakebase table FQN must have 1–3 dotted parts, got {len(parts)}" + ) + for part in parts: + if not _IDENT_PART.match(part): + raise DirectoryError( + f"Lakebase table FQN segment {part!r} is not a valid identifier" + ) + # Quote each part so reserved words still work; we already + # validated charset so the quoted form is safe. + return ".".join(f'"{p}"' for p in parts) + + +class LakebaseProvider(DirectoryProvider): + """Provider that reads principals from a Postgres table.""" + + def __init__( + self, + ctx: DirectoryProviderContext, + config: DirectoryProviderConfig, + ) -> None: + if ctx.db_engine is None: + raise DirectoryError("Database engine is required for Lakebase provider") + self._engine = ctx.db_engine + # Validate at construction time so misconfiguration is caught + # at startup / test rather than on the first search call. + self._table = _validate_fqn(config.lakebase_table or "") + + # ----- DirectoryProvider -------------------------------------------------- + + def search_users(self, prefix: str, top: int) -> List[Principal]: + return self._search("user", prefix, top) + + def search_groups(self, prefix: str, top: int) -> List[Principal]: + return self._search("group", prefix, top) + + def get_user(self, id: str) -> Principal: + return self._get("user", id) + + def get_group(self, id: str) -> Principal: + return self._get("group", id) + + def test(self) -> None: + # Cheapest possible probe: confirm the table exists and is readable. + # ``LIMIT 1`` keeps the row scan tiny. + sql = text(f"SELECT 1 FROM {self._table} LIMIT 1") + try: + with self._engine.connect() as conn: + conn.execute(sql) + except Exception as exc: + raise DirectoryError(f"Lakebase test query failed: {exc}") from exc + + # ----- internals ---------------------------------------------------------- + + def _search(self, kind: str, prefix: str, top: int) -> List[Principal]: + if not prefix: + return [] + # ``LOWER(col) LIKE LOWER(:p)`` is case-insensitive on both + # Postgres and SQLite (ASCII) without needing dialect-specific + # ILIKE. The LIKE wildcards ``%`` / ``_`` in user input are + # escaped (with ``\`` as escape char) so a raw ``%`` doesn't + # open a directory dump. + safe_prefix = ( + prefix.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_") + ) + sql = text( + f"SELECT type, id, display_name, sub_label " + f"FROM {self._table} " + f"WHERE type = :kind " + f" AND (" + f" LOWER(display_name) LIKE LOWER(:p) ESCAPE '\\' " + f" OR LOWER(id) LIKE LOWER(:p) ESCAPE '\\'" + f" ) " + f"ORDER BY display_name " + f"LIMIT :n" + ) + try: + with self._engine.connect() as conn: + rows = conn.execute( + sql, + {"kind": kind, "p": f"{safe_prefix}%", "n": int(top)}, + ).fetchall() + except Exception as exc: + raise DirectoryError(f"Lakebase search failed: {exc}") from exc + return [_row_to_principal(r) for r in rows] + + def _get(self, kind: str, id: str) -> Principal: + if not id: + raise DirectoryError(f"Empty {kind} id") + sql = text( + f"SELECT type, id, display_name, sub_label " + f"FROM {self._table} " + f"WHERE type = :kind AND id = :id " + f"LIMIT 1" + ) + try: + with self._engine.connect() as conn: + row = conn.execute(sql, {"kind": kind, "id": id}).fetchone() + except Exception as exc: + raise DirectoryError(f"Lakebase lookup failed: {exc}") from exc + if row is None: + raise DirectoryError(f"{kind.capitalize()} {id!r} not found") + return _row_to_principal(row) + + +def _row_to_principal(row: Any) -> Principal: + """Map a Postgres row to a Principal, tolerating raw types. + + SQLAlchemy ``Row`` objects support both name and index access; we + use names to keep the mapping explicit. + """ + + kind = (row.type or "").lower() + ptype = PrincipalType.USER if kind == "user" else ( + PrincipalType.GROUP if kind == "group" else PrincipalType.UNKNOWN + ) + return Principal( + type=ptype, + id=row.id or "", + display_name=row.display_name or row.id or "", + sub_label=row.sub_label or None, + ) diff --git a/src/backend/src/models/directory.py b/src/backend/src/models/directory.py index ae63c142..620140dd 100644 --- a/src/backend/src/models/directory.py +++ b/src/backend/src/models/directory.py @@ -1,8 +1,13 @@ """Pydantic API models for the Directory layer (external IdP lookups). The Directory layer is the generic abstraction over identity providers. -v1 ships one concrete provider (Microsoft Entra ID via Microsoft Graph), -but the manager / routes / models are provider-agnostic so future +v1 ships three concrete providers: + +- ``entra`` — Microsoft Entra ID via Microsoft Graph (UC HTTP Connection) +- ``lakebase`` — a Postgres table sitting in the app's Lakebase instance +- ``file`` — a local CSV file (primarily for tests and demos) + +The manager / routes / models stay provider-agnostic so future providers (Okta, Ping, ...) can be added without breaking changes. See plans/directory-lookup-and-principal-picker.md. @@ -61,18 +66,27 @@ class DirectoryProviderType(str, Enum): """ ENTRA = "entra" + LAKEBASE = "lakebase" + FILE = "file" class DirectoryStatus(BaseModel): """Reports whether the directory is wired up. - ``configured`` is True iff both a recognised provider type and a UC - HTTP connection name are persisted in settings. + ``configured`` is True iff the persisted provider type is one we + recognise *and* the provider-specific settings it requires are all + non-empty. This model carries the raw settings (not secrets) so + the Settings tab can hydrate its inputs without an extra round + trip. """ configured: bool provider_type: Optional[str] = None - connection_name: Optional[str] = None + # Provider-specific settings (most will be ``None`` depending on + # the configured provider_type). + connection_name: Optional[str] = None # entra + lakebase_table: Optional[str] = None # lakebase + file_path: Optional[str] = None # file class DirectoryTestResult(BaseModel): @@ -91,13 +105,19 @@ class DirectorySearchResponse(BaseModel): class DirectorySettingsUpdate(BaseModel): """Payload accepted by ``PUT /api/directory/settings``. - Either field may be ``None`` to clear that setting. + Any field may be ``None`` to clear that setting. Each provider + type cares about a different subset; the UI sends the full set + every time and the backend persists only the keys present. """ provider_type: Optional[str] = None connection_name: Optional[str] = None + lakebase_table: Optional[str] = None + file_path: Optional[str] = None # Setting keys (single source of truth) SETTING_KEY_PROVIDER_TYPE = "DIRECTORY_PROVIDER_TYPE" SETTING_KEY_CONNECTION_NAME = "DIRECTORY_UC_HTTP_CONNECTION_NAME" +SETTING_KEY_LAKEBASE_TABLE = "DIRECTORY_LAKEBASE_TABLE" +SETTING_KEY_FILE_PATH = "DIRECTORY_FILE_PATH" diff --git a/src/backend/src/routes/directory_routes.py b/src/backend/src/routes/directory_routes.py index 7cc620cc..35bd8e48 100644 --- a/src/backend/src/routes/directory_routes.py +++ b/src/backend/src/routes/directory_routes.py @@ -1,13 +1,16 @@ """Directory layer API: status, search, test, and provider-agnostic settings. Settings keys live in the existing ``app_settings`` key/value table so -no Alembic migration is required. All Graph traffic flows through a UC -HTTP Connection; the app never holds a client secret or token. +no Alembic migration is required. v1 ships three providers: + +- ``entra`` — Microsoft Entra ID via Microsoft Graph (UC HTTP Connection) +- ``lakebase`` — A Postgres table on the app's primary Lakebase database +- ``file`` — A local CSV file (tests / demos) See plans/directory-lookup-and-principal-picker.md. """ -from typing import List, Optional +from typing import Any, List, Optional from fastapi import APIRouter, Depends, Query, Request from sqlalchemy.orm import Session @@ -18,13 +21,18 @@ from src.common.features import FeatureAccessLevel from src.common.logging import get_logger from src.common.uc_connections import list_http_connections -from src.controller.directory_providers import DirectoryError +from src.controller.directory_providers import ( + DirectoryError, + DirectoryProviderContext, +) from src.models.directory import ( DirectorySearchResponse, DirectorySettingsUpdate, DirectoryStatus, DirectoryTestResult, SETTING_KEY_CONNECTION_NAME, + SETTING_KEY_FILE_PATH, + SETTING_KEY_LAKEBASE_TABLE, SETTING_KEY_PROVIDER_TYPE, ) from src.repositories.app_settings_repository import app_settings_repo @@ -34,6 +42,26 @@ router = APIRouter(prefix="/api/directory", tags=["Directory"]) +def _build_context(request: Request, db: Session) -> DirectoryProviderContext: + """Assemble the per-request provider context. + + Each provider reads only the transport handles it cares about: + Entra needs ``ws_client``, Lakebase needs ``db_engine``, File + needs neither. + """ + + ws_client: Any = None + try: + from src.common.workspace_client import get_obo_workspace_client + + ws_client = get_obo_workspace_client(request) + except Exception: + ws_client = None + + db_engine = db.get_bind() if db is not None else None + return DirectoryProviderContext(ws_client=ws_client, db_engine=db_engine) + + @router.get("/status", response_model=DirectoryStatus) async def get_status( manager: DirectoryManagerDep, @@ -68,20 +96,12 @@ async def search( picker's unconfigured mode handles the UX from there. """ - from src.common.workspace_client import get_obo_workspace_client - parsed_types: List[str] = [] if types: parsed_types = [t.strip() for t in types.split(",") if t.strip()] + ctx = _build_context(request, db) try: - ws = get_obo_workspace_client(request) - except Exception: - # The picker is expected to degrade gracefully; treat workspace - # client failure the same as an empty result. - return DirectorySearchResponse(results=[]) - - try: - results = manager.search(db, ws, query=q, types=parsed_types, limit=limit) + results = manager.search(db, ctx, query=q, types=parsed_types, limit=limit) except DirectoryError as exc: logger.warning(f"Directory search failed: {exc}") return DirectorySearchResponse(results=[]) @@ -97,15 +117,9 @@ async def test( ) -> DirectoryTestResult: """Probe the configured provider; surfaces a typed success/error to the UI.""" - from src.common.workspace_client import get_obo_workspace_client - - try: - ws = get_obo_workspace_client(request) - except Exception as exc: - return DirectoryTestResult(healthy=False, error=f"Workspace client error: {exc}") - + ctx = _build_context(request, db) try: - manager.test(db, ws) + manager.test(db, ctx) except DirectoryError as exc: return DirectoryTestResult(healthy=False, error=str(exc)) except Exception as exc: @@ -121,17 +135,20 @@ async def update_settings( db: Session = Depends(get_db), _: bool = Depends(PermissionChecker("settings", FeatureAccessLevel.READ_WRITE)), ) -> DirectoryStatus: - """Persist provider type and/or connection name, then invalidate cache. + """Persist any directory settings supplied, then invalidate cache. - Either field may be ``None`` (or empty string) to clear that - setting. Caller passes both for full updates; passing just one is - an "edit one field" shortcut. + The caller passes the full set on save; missing fields are left + untouched. Pass an explicit empty string to clear a setting. """ if body.provider_type is not None: app_settings_repo.set(db, SETTING_KEY_PROVIDER_TYPE, body.provider_type or None) if body.connection_name is not None: app_settings_repo.set(db, SETTING_KEY_CONNECTION_NAME, body.connection_name or None) + if body.lakebase_table is not None: + app_settings_repo.set(db, SETTING_KEY_LAKEBASE_TABLE, body.lakebase_table or None) + if body.file_path is not None: + app_settings_repo.set(db, SETTING_KEY_FILE_PATH, body.file_path or None) manager.invalidate_cache() return manager.get_status(db) diff --git a/src/backend/src/tests/unit/test_directory_manager.py b/src/backend/src/tests/unit/test_directory_manager.py index 18bfc962..47508110 100644 --- a/src/backend/src/tests/unit/test_directory_manager.py +++ b/src/backend/src/tests/unit/test_directory_manager.py @@ -15,11 +15,18 @@ register_provider, unregister_provider, ) -from src.controller.directory_providers import DirectoryError, DirectoryProvider +from src.controller.directory_providers import ( + DirectoryError, + DirectoryProvider, + DirectoryProviderConfig, + DirectoryProviderContext, +) from src.models.directory import ( Principal, PrincipalType, SETTING_KEY_CONNECTION_NAME, + SETTING_KEY_FILE_PATH, + SETTING_KEY_LAKEBASE_TABLE, SETTING_KEY_PROVIDER_TYPE, ) @@ -27,9 +34,9 @@ class _StubProvider(DirectoryProvider): """Test double; lets us prove the abstraction is enough on its own.""" - def __init__(self, ws_client, connection_name): - self.ws = ws_client - self.connection_name = connection_name + def __init__(self, ctx: DirectoryProviderContext, config: DirectoryProviderConfig): + self.ctx = ctx + self.config = config self.search_users_calls = 0 self.search_groups_calls = 0 self.test_calls = 0 @@ -54,18 +61,26 @@ def test(self): self.test_calls += 1 +def _stub_ctx() -> DirectoryProviderContext: + return DirectoryProviderContext(ws_client=MagicMock(), db_engine=MagicMock()) + + @pytest.fixture def stub_registered(): - """Register a 'stub' provider for the duration of the test.""" + """Register a 'stub' provider for the duration of the test. + + The fixture's job is teardown; tests that need a seeded stub + instance re-register a counting factory inside the test body. + """ instances: List[_StubProvider] = [] - def factory(ws_client, connection_name): - inst = _StubProvider(ws_client, connection_name) + def factory(ctx, config): + inst = _StubProvider(ctx, config) instances.append(inst) return inst - register_provider("stub", factory) + register_provider("stub", factory, required_keys=(SETTING_KEY_CONNECTION_NAME,)) try: yield instances finally: @@ -116,154 +131,173 @@ def test_configured_when_provider_recognised(self, db_with_settings, stub_regist status = DirectoryManager().get_status(db_with_settings) assert status.configured is True + def test_status_exposes_per_provider_settings(self, db_with_settings): + # All three provider-specific fields make it back into the + # status payload (for the Settings tab to hydrate). + with _patch_settings({ + SETTING_KEY_PROVIDER_TYPE: "file", + SETTING_KEY_FILE_PATH: "/tmp/principals.csv", + SETTING_KEY_LAKEBASE_TABLE: "main.directory.principals", + SETTING_KEY_CONNECTION_NAME: "my-graph", + }): + status = DirectoryManager().get_status(db_with_settings) + assert status.file_path == "/tmp/principals.csv" + assert status.lakebase_table == "main.directory.principals" + assert status.connection_name == "my-graph" + # Only the active provider's required key gates "configured". + assert status.configured is True + + def test_status_unconfigured_when_required_setting_missing(self, db_with_settings): + with _patch_settings({ + SETTING_KEY_PROVIDER_TYPE: "file", + # No FILE_PATH set. + }): + status = DirectoryManager().get_status(db_with_settings) + assert status.configured is False + assert status.provider_type == "file" + class TestSearch: def test_empty_when_not_configured(self, db_with_settings): with _patch_settings({}): - results = DirectoryManager().search(db_with_settings, MagicMock(), query="a", types=["user"]) + results = DirectoryManager().search( + db_with_settings, _stub_ctx(), query="a", types=["user"], + ) assert results == [] - def test_dispatches_to_registered_provider(self, db_with_settings, stub_registered): - with _patch_settings({ - SETTING_KEY_PROVIDER_TYPE: "stub", - SETTING_KEY_CONNECTION_NAME: "conn", - }): - mgr = DirectoryManager() - # Pre-arm the next stub instance via the factory side-effect. - # We have to call search first so the instance exists; arrange - # the data on the next-created instance. - captured = stub_registered - - # Trick: monkey-patch the factory to return a pre-seeded stub. - def seeded_factory(ws_client, connection_name): - inst = _StubProvider(ws_client, connection_name) - inst.next_users = [ - Principal(type=PrincipalType.USER, id="alice@x", display_name="Alice", sub_label="alice@x"), - ] - captured.append(inst) - return inst - - register_provider("stub", seeded_factory) - results = mgr.search(db_with_settings, MagicMock(), query="ali", types=["user"]) + def test_dispatches_to_registered_provider(self, db_with_settings): + seeded: List[_StubProvider] = [] + + def factory(ctx, config): + inst = _StubProvider(ctx, config) + inst.next_users = [ + Principal(type=PrincipalType.USER, id="alice@x", display_name="Alice", sub_label="alice@x"), + ] + seeded.append(inst) + return inst + register_provider("stub", factory, required_keys=(SETTING_KEY_CONNECTION_NAME,)) + try: + with _patch_settings({ + SETTING_KEY_PROVIDER_TYPE: "stub", + SETTING_KEY_CONNECTION_NAME: "conn", + }): + results = DirectoryManager().search( + db_with_settings, _stub_ctx(), query="ali", types=["user"], + ) + finally: + unregister_provider("stub") assert [(p.type, p.id) for p in results] == [(PrincipalType.USER, "alice@x")] - def test_cache_hits_on_second_call(self, db_with_settings, stub_registered): - # Replace factory with a counting one + def test_cache_hits_on_second_call(self, db_with_settings): created = [] - def factory(ws_client, connection_name): - stub = _StubProvider(ws_client, connection_name) + def factory(ctx, config): + stub = _StubProvider(ctx, config) stub.next_users = [ Principal(type=PrincipalType.USER, id="alice@x", display_name="Alice", sub_label="alice@x"), ] created.append(stub) return stub - register_provider("stub", factory) + register_provider("stub", factory, required_keys=(SETTING_KEY_CONNECTION_NAME,)) try: with _patch_settings({ SETTING_KEY_PROVIDER_TYPE: "stub", SETTING_KEY_CONNECTION_NAME: "conn", }): mgr = DirectoryManager() - mgr.search(db_with_settings, MagicMock(), query="ali", types=["user"]) - mgr.search(db_with_settings, MagicMock(), query="ali", types=["user"]) - mgr.search(db_with_settings, MagicMock(), query="ALI", types=["user"]) # case-insensitive - mgr.search(db_with_settings, MagicMock(), query=" ali ", types=["user"]) # whitespace - # Provider instances are cheap to create; what we care about - # is that the underlying search_users was only called once. + mgr.search(db_with_settings, _stub_ctx(), query="ali", types=["user"]) + mgr.search(db_with_settings, _stub_ctx(), query="ali", types=["user"]) + mgr.search(db_with_settings, _stub_ctx(), query="ALI", types=["user"]) + mgr.search(db_with_settings, _stub_ctx(), query=" ali ", types=["user"]) assert sum(s.search_users_calls for s in created) == 1 finally: unregister_provider("stub") - def test_cache_invalidates_when_settings_change(self, db_with_settings, stub_registered): + def test_cache_invalidates_when_settings_change(self, db_with_settings): created = [] - def factory(ws_client, connection_name): - stub = _StubProvider(ws_client, connection_name) + def factory(ctx, config): + stub = _StubProvider(ctx, config) stub.next_users = [ - Principal(type=PrincipalType.USER, id=f"u@{connection_name}", display_name="U", sub_label=None), + Principal(type=PrincipalType.USER, id=f"u@{config.connection_name}", display_name="U", sub_label=None), ] created.append(stub) return stub - register_provider("stub", factory) + register_provider("stub", factory, required_keys=(SETTING_KEY_CONNECTION_NAME,)) try: mgr = DirectoryManager() - # First settings with _patch_settings({ SETTING_KEY_PROVIDER_TYPE: "stub", SETTING_KEY_CONNECTION_NAME: "conn-A", }): - mgr.search(db_with_settings, MagicMock(), query="a", types=["user"]) - # Different connection name => same query should re-hit provider + mgr.search(db_with_settings, _stub_ctx(), query="a", types=["user"]) with _patch_settings({ SETTING_KEY_PROVIDER_TYPE: "stub", SETTING_KEY_CONNECTION_NAME: "conn-B", }): - mgr.search(db_with_settings, MagicMock(), query="a", types=["user"]) + mgr.search(db_with_settings, _stub_ctx(), query="a", types=["user"]) assert sum(s.search_users_calls for s in created) == 2 finally: unregister_provider("stub") - def test_explicit_invalidate_drops_cache(self, db_with_settings, stub_registered): + def test_explicit_invalidate_drops_cache(self, db_with_settings): created = [] - def factory(ws_client, connection_name): - stub = _StubProvider(ws_client, connection_name) + def factory(ctx, config): + stub = _StubProvider(ctx, config) stub.next_users = [ Principal(type=PrincipalType.USER, id="x@x", display_name="X", sub_label=None), ] created.append(stub) return stub - register_provider("stub", factory) + register_provider("stub", factory, required_keys=(SETTING_KEY_CONNECTION_NAME,)) try: mgr = DirectoryManager() with _patch_settings({ SETTING_KEY_PROVIDER_TYPE: "stub", SETTING_KEY_CONNECTION_NAME: "conn", }): - mgr.search(db_with_settings, MagicMock(), query="a", types=["user"]) + mgr.search(db_with_settings, _stub_ctx(), query="a", types=["user"]) mgr.invalidate_cache() - mgr.search(db_with_settings, MagicMock(), query="a", types=["user"]) + mgr.search(db_with_settings, _stub_ctx(), query="a", types=["user"]) assert sum(s.search_users_calls for s in created) == 2 finally: unregister_provider("stub") def test_types_filter_narrows_calls(self, db_with_settings, stub_registered): - created = [] - - def factory(ws_client, connection_name): - stub = _StubProvider(ws_client, connection_name) - created.append(stub) - return stub - - register_provider("stub", factory) - try: - mgr = DirectoryManager() - with _patch_settings({ - SETTING_KEY_PROVIDER_TYPE: "stub", - SETTING_KEY_CONNECTION_NAME: "conn", - }): - mgr.search(db_with_settings, MagicMock(), query="x", types=["user"]) - assert sum(s.search_users_calls for s in created) == 1 - assert sum(s.search_groups_calls for s in created) == 0 - finally: - unregister_provider("stub") + with _patch_settings({ + SETTING_KEY_PROVIDER_TYPE: "stub", + SETTING_KEY_CONNECTION_NAME: "conn", + }): + DirectoryManager().search( + db_with_settings, _stub_ctx(), query="x", types=["user"], + ) + # The fixture registered a counting factory; verify it ran. + assert sum(s.search_users_calls for s in stub_registered) == 1 + assert sum(s.search_groups_calls for s in stub_registered) == 0 class TestTestProbe: def test_raises_when_unconfigured(self, db_with_settings): with _patch_settings({}): with pytest.raises(DirectoryError, match="not configured"): - DirectoryManager().test(db_with_settings, MagicMock()) + DirectoryManager().test(db_with_settings, _stub_ctx()) + + def test_raises_when_required_key_missing(self, db_with_settings, stub_registered): + # Provider registered but its required setting (connection_name) + # is absent => clear error message. + with _patch_settings({SETTING_KEY_PROVIDER_TYPE: "stub"}): + with pytest.raises(DirectoryError, match="missing required"): + DirectoryManager().test(db_with_settings, _stub_ctx()) def test_dispatches_to_provider(self, db_with_settings, stub_registered): with _patch_settings({ SETTING_KEY_PROVIDER_TYPE: "stub", SETTING_KEY_CONNECTION_NAME: "conn", }): - DirectoryManager().test(db_with_settings, MagicMock()) + DirectoryManager().test(db_with_settings, _stub_ctx()) # If we got here, dispatch worked (StubProvider.test() is a no-op). diff --git a/src/backend/src/tests/unit/test_entra_id_provider.py b/src/backend/src/tests/unit/test_entra_id_provider.py index b8642ab5..ca419618 100644 --- a/src/backend/src/tests/unit/test_entra_id_provider.py +++ b/src/backend/src/tests/unit/test_entra_id_provider.py @@ -13,6 +13,8 @@ from src.controller.directory_providers import ( DirectoryError, + DirectoryProviderConfig, + DirectoryProviderContext, EntraIdProvider, ) from src.controller.directory_providers.entra_id_provider import ( @@ -39,6 +41,15 @@ def _ws_returning(payload): return ws +def _make_provider(ws, *, connection_name: str = "my-graph") -> EntraIdProvider: + """Build an EntraIdProvider via the (ctx, config) factory contract.""" + + return EntraIdProvider( + DirectoryProviderContext(ws_client=ws), + DirectoryProviderConfig(connection_name=connection_name), + ) + + class TestOdataEscaping: def test_doubles_single_quote(self): assert _escape_odata("O'Brien") == "O''Brien" @@ -79,7 +90,7 @@ def test_maps_userPrincipalName_to_id(self): {"id": "guid-1", "displayName": "Alice", "userPrincipalName": "alice@contoso.com", "mail": "alice@contoso.com"}, ] }) - provider = EntraIdProvider(ws, connection_name="my-graph") + provider = _make_provider(ws) results = provider.search_users("ali", top=20) assert len(results) == 1 p = results[0] @@ -91,7 +102,7 @@ def test_maps_userPrincipalName_to_id(self): def test_escapes_quote_in_query(self): ws = _ws_returning({"value": []}) - provider = EntraIdProvider(ws, connection_name="my-graph") + provider = _make_provider(ws) provider.search_users("O'Brien", top=20) call = ws.serving_endpoints.http_request.call_args path = call.kwargs["path"] @@ -102,7 +113,7 @@ def test_escapes_quote_in_query(self): def test_uses_select_projection(self): ws = _ws_returning({"value": []}) - provider = EntraIdProvider(ws, connection_name="my-graph") + provider = _make_provider(ws) provider.search_users("a", top=5) path = ws.serving_endpoints.http_request.call_args.kwargs["path"] assert "$select=id,displayName,userPrincipalName,mail" in path @@ -110,7 +121,7 @@ def test_uses_select_projection(self): def test_empty_query_short_circuits(self): ws = MagicMock() - provider = EntraIdProvider(ws, connection_name="my-graph") + provider = _make_provider(ws) assert provider.search_users("", top=20) == [] ws.serving_endpoints.http_request.assert_not_called() @@ -118,7 +129,7 @@ def test_falls_back_to_mail_when_no_upn(self): ws = _ws_returning({ "value": [{"id": "guid-2", "displayName": "Bob", "mail": "bob@x"}] }) - provider = EntraIdProvider(ws, connection_name="my-graph") + provider = _make_provider(ws) p = provider.search_users("b", top=20)[0] assert p.id == "bob@x" @@ -130,7 +141,7 @@ def test_maps_displayName_to_id(self): {"id": "group-guid", "displayName": "Data Producers", "description": "all DPs"}, ] }) - provider = EntraIdProvider(ws, connection_name="my-graph") + provider = _make_provider(ws) results = provider.search_groups("Data", top=20) assert len(results) == 1 p = results[0] @@ -141,7 +152,7 @@ def test_maps_displayName_to_id(self): def test_uses_group_select_projection(self): ws = _ws_returning({"value": []}) - provider = EntraIdProvider(ws, connection_name="my-graph") + provider = _make_provider(ws) provider.search_groups("X", top=20) path = ws.serving_endpoints.http_request.call_args.kwargs["path"] assert "$select=id,displayName,description" in path @@ -150,7 +161,7 @@ def test_uses_group_select_projection(self): class TestTest: def test_happy_path(self): ws = _ws_returning({"value": [{"id": "guid"}]}) - provider = EntraIdProvider(ws, connection_name="my-graph") + provider = _make_provider(ws) provider.test() # no exception path = ws.serving_endpoints.http_request.call_args.kwargs["path"] assert path.startswith("/v1.0/users") @@ -158,24 +169,24 @@ def test_happy_path(self): def test_raises_on_graph_error_body(self): ws = _ws_returning({"error": {"code": "InvalidAuthenticationToken", "message": "Access token is empty."}}) - provider = EntraIdProvider(ws, connection_name="my-graph") + provider = _make_provider(ws) with pytest.raises(DirectoryError, match="InvalidAuthenticationToken"): provider.test() def test_raises_on_transport_error(self): ws = MagicMock() ws.serving_endpoints.http_request.side_effect = RuntimeError("connection refused") - provider = EntraIdProvider(ws, connection_name="my-graph") + provider = _make_provider(ws) with pytest.raises(DirectoryError, match="connection refused"): provider.test() def test_raises_on_non_json_body(self): ws = MagicMock() ws.serving_endpoints.http_request.return_value = _stub_response("not json at all") - provider = EntraIdProvider(ws, connection_name="my-graph") + provider = _make_provider(ws) with pytest.raises(DirectoryError, match="non-JSON"): provider.test() def test_raises_on_empty_connection_name(self): with pytest.raises(DirectoryError): - EntraIdProvider(MagicMock(), connection_name="") + _make_provider(MagicMock(), connection_name="") diff --git a/src/backend/src/tests/unit/test_file_provider.py b/src/backend/src/tests/unit/test_file_provider.py new file mode 100644 index 00000000..cdc0606e --- /dev/null +++ b/src/backend/src/tests/unit/test_file_provider.py @@ -0,0 +1,193 @@ +"""Unit tests for FileProvider. + +Writes real CSV files to a temp dir so the parser, mtime cache, and +DictReader path get exercised end-to-end. +""" + +import os +import textwrap +import time +from pathlib import Path + +import pytest + +from src.controller.directory_providers import ( + DirectoryError, + DirectoryProviderConfig, + DirectoryProviderContext, + FileProvider, +) +from src.controller.directory_providers.file_provider import _clear_cache_for_tests +from src.models.directory import PrincipalType + + +@pytest.fixture(autouse=True) +def _reset_file_cache(): + """The provider's file cache is class-level; reset between tests.""" + + _clear_cache_for_tests() + yield + _clear_cache_for_tests() + + +def _write_csv(tmp_path: Path, body: str, name: str = "principals.csv") -> str: + path = tmp_path / name + path.write_text(textwrap.dedent(body).lstrip(), encoding="utf-8") + return str(path) + + +def _make_provider(path: str) -> FileProvider: + return FileProvider( + DirectoryProviderContext(), + DirectoryProviderConfig(file_path=path), + ) + + +class TestParsing: + def test_loads_users_and_groups(self, tmp_path: Path): + path = _write_csv(tmp_path, """ + type,id,display_name,sub_label + user,alice@example.com,Alice Liddell,alice@example.com + user,bob@example.com,Bob Builder,bob@example.com + group,Producers,Data Producers,producers-guid + """) + provider = _make_provider(path) + users = provider.search_users("a", top=20) + groups = provider.search_groups("data", top=20) + assert [p.id for p in users] == ["alice@example.com"] + assert [p.display_name for p in groups] == ["Data Producers"] + assert groups[0].type == PrincipalType.GROUP + + def test_falls_back_display_name_to_id(self, tmp_path: Path): + # display_name column present but empty for one row -> falls + # back to the id so the picker can still render a badge. + path = _write_csv(tmp_path, """ + type,id,display_name,sub_label + user,onlyid@example.com,, + """) + provider = _make_provider(path) + results = provider.search_users("only", top=20) + assert results[0].display_name == "onlyid@example.com" + + def test_skips_blank_rows(self, tmp_path: Path): + path = _write_csv(tmp_path, """ + type,id,display_name,sub_label + ,,, + user,alice@example.com,Alice,alice@example.com + ,,, + """) + provider = _make_provider(path) + assert len(provider.search_users("a", top=20)) == 1 + + def test_rejects_unknown_type(self, tmp_path: Path): + path = _write_csv(tmp_path, """ + type,id,display_name,sub_label + robot,alice@example.com,Alice,alice@example.com + """) + provider = _make_provider(path) + with pytest.raises(DirectoryError, match="must be 'user' or 'group'"): + provider.search_users("a", top=20) + + def test_rejects_missing_required_column(self, tmp_path: Path): + path = _write_csv(tmp_path, """ + type,id + user,alice@example.com + """) + provider = _make_provider(path) + with pytest.raises(DirectoryError, match="missing required columns"): + provider.search_users("a", top=20) + + def test_rejects_blank_id(self, tmp_path: Path): + path = _write_csv(tmp_path, """ + type,id,display_name,sub_label + user,,Alice,alice@example.com + """) + provider = _make_provider(path) + with pytest.raises(DirectoryError, match="id is required"): + provider.search_users("a", top=20) + + +class TestSearch: + def test_top_caps_results(self, tmp_path: Path): + path = _write_csv(tmp_path, """ + type,id,display_name,sub_label + user,a1@x,Alice 1, + user,a2@x,Alice 2, + user,a3@x,Alice 3, + """) + provider = _make_provider(path) + assert len(provider.search_users("a", top=2)) == 2 + + def test_prefix_search_is_case_insensitive(self, tmp_path: Path): + path = _write_csv(tmp_path, """ + type,id,display_name,sub_label + user,alice@example.com,Alice Liddell, + """) + provider = _make_provider(path) + assert len(provider.search_users("ALI", top=20)) == 1 + + def test_search_matches_against_id_too(self, tmp_path: Path): + path = _write_csv(tmp_path, """ + type,id,display_name,sub_label + user,bob.builder@example.com,Robert Builder, + """) + provider = _make_provider(path) + assert len(provider.search_users("bob", top=20)) == 1 + + def test_empty_query_short_circuits(self, tmp_path: Path): + path = _write_csv(tmp_path, """ + type,id,display_name,sub_label + user,alice@example.com,Alice, + """) + provider = _make_provider(path) + assert provider.search_users("", top=20) == [] + + +class TestCache: + def test_re_reads_when_mtime_advances(self, tmp_path: Path): + path = _write_csv(tmp_path, """ + type,id,display_name,sub_label + user,alice@example.com,Alice, + """) + provider = _make_provider(path) + assert len(provider.search_users("a", top=20)) == 1 + # Overwrite with two principals and bump mtime explicitly. + Path(path).write_text(textwrap.dedent(""" + type,id,display_name,sub_label + user,alice@example.com,Alice, + user,andre@example.com,Andre, + """).lstrip(), encoding="utf-8") + os.utime(path, (time.time() + 5, time.time() + 5)) + assert len(provider.search_users("a", top=20)) == 2 + + +class TestProbe: + def test_test_succeeds_for_valid_file(self, tmp_path: Path): + path = _write_csv(tmp_path, """ + type,id,display_name,sub_label + user,alice@example.com,Alice, + """) + _make_provider(path).test() + + def test_test_fails_for_missing_file(self, tmp_path: Path): + provider = _make_provider(str(tmp_path / "does-not-exist.csv")) + with pytest.raises(DirectoryError, match="not found"): + provider.test() + + def test_test_fails_for_malformed_csv(self, tmp_path: Path): + path = _write_csv(tmp_path, """ + no,header,here + user,foo,bar + """) + provider = _make_provider(path) + with pytest.raises(DirectoryError): + provider.test() + + +class TestConstruction: + def test_requires_file_path(self): + with pytest.raises(DirectoryError, match="required"): + FileProvider( + DirectoryProviderContext(), + DirectoryProviderConfig(file_path=""), + ) diff --git a/src/backend/src/tests/unit/test_lakebase_provider.py b/src/backend/src/tests/unit/test_lakebase_provider.py new file mode 100644 index 00000000..c9014eeb --- /dev/null +++ b/src/backend/src/tests/unit/test_lakebase_provider.py @@ -0,0 +1,180 @@ +"""Unit tests for LakebaseProvider. + +Uses an in-memory SQLite engine so the tests exercise real SQLAlchemy +text() bindings (escaping, parameterisation) rather than a mocked +connection. SQLite's ``LIKE`` is case-insensitive by default for ASCII, +which matches the case-insensitive semantics we want from Postgres +``ILIKE`` for the search assertions here. +""" + +import pytest +from sqlalchemy import create_engine, text + +from src.controller.directory_providers import ( + DirectoryError, + DirectoryProviderConfig, + DirectoryProviderContext, + LakebaseProvider, +) +from src.controller.directory_providers.lakebase_provider import _validate_fqn +from src.models.directory import PrincipalType + + +@pytest.fixture +def engine_with_principals(): + """Build an in-memory engine with a populated principals table.""" + + engine = create_engine("sqlite:///:memory:") + with engine.begin() as conn: + conn.execute( + text( + 'CREATE TABLE principals (' + '"type" TEXT NOT NULL,' + '"id" TEXT NOT NULL,' + '"display_name" TEXT NOT NULL,' + '"sub_label" TEXT' + ')' + ) + ) + conn.execute( + text( + 'INSERT INTO principals (type, id, display_name, sub_label) ' + "VALUES " + "('user', 'alice@example.com', 'Alice Liddell', 'alice@example.com')," + "('user', 'bob@example.com', 'Bob Builder', 'bob@example.com')," + "('user', 'amelia@example.com','Amelia Earhart','amelia@example.com')," + "('group', 'Producers', 'Data Producers', 'producers-guid')," + "('group', 'Stewards', 'Data Stewards', 'stewards-guid')" + ) + ) + return engine + + +def _make_provider(engine, table: str = "principals") -> LakebaseProvider: + return LakebaseProvider( + DirectoryProviderContext(db_engine=engine), + DirectoryProviderConfig(lakebase_table=table), + ) + + +class TestFqnValidation: + def test_accepts_single_part(self): + assert _validate_fqn("principals") == '"principals"' + + def test_accepts_three_parts(self): + assert _validate_fqn("main.directory.principals") == \ + '"main"."directory"."principals"' + + def test_rejects_empty(self): + with pytest.raises(DirectoryError): + _validate_fqn("") + + def test_rejects_too_many_parts(self): + with pytest.raises(DirectoryError): + _validate_fqn("a.b.c.d") + + def test_rejects_sql_injection_attempt(self): + # Hyphens, quotes, semicolons, spaces all rejected at the + # identifier-segment level. + for bad in ( + "principals; DROP TABLE principals", + 'evil"', + "with-dash", + "with space", + "1starts_with_digit", + ): + with pytest.raises(DirectoryError): + _validate_fqn(bad) + + +class TestSearch: + def test_prefix_match_against_display_name(self, engine_with_principals): + provider = _make_provider(engine_with_principals) + results = provider.search_users("ali", top=20) + ids = sorted(p.id for p in results) + assert ids == ["alice@example.com"] + + def test_prefix_match_against_id(self, engine_with_principals): + # ``bo`` matches Bob's id (bob@…) but not Amelia's display_name. + provider = _make_provider(engine_with_principals) + results = provider.search_users("bo", top=20) + assert [p.id for p in results] == ["bob@example.com"] + + def test_returns_principal_with_full_shape(self, engine_with_principals): + provider = _make_provider(engine_with_principals) + p = provider.search_users("alice", top=20)[0] + assert p.type == PrincipalType.USER + assert p.id == "alice@example.com" + assert p.display_name == "Alice Liddell" + assert p.sub_label == "alice@example.com" + + def test_search_groups_filters_by_type(self, engine_with_principals): + provider = _make_provider(engine_with_principals) + groups = provider.search_groups("data", top=20) + ids = sorted(p.id for p in groups) + assert ids == ["Producers", "Stewards"] + # All returned principals must be groups. + assert {p.type for p in groups} == {PrincipalType.GROUP} + + def test_top_caps_results(self, engine_with_principals): + provider = _make_provider(engine_with_principals) + # All three users start with 'a' or 'b'; 'a' alone matches alice + amelia. + results = provider.search_users("a", top=1) + assert len(results) == 1 + + def test_empty_query_short_circuits(self, engine_with_principals): + provider = _make_provider(engine_with_principals) + assert provider.search_users("", top=20) == [] + + def test_wildcard_in_input_is_escaped(self, engine_with_principals): + # Without escaping, '%' would expand to "match anything" and + # this query would return rows. With escaping, the literal '%' + # is sought and nothing matches. + provider = _make_provider(engine_with_principals) + assert provider.search_users("%", top=20) == [] + assert provider.search_users("_", top=20) == [] + + +class TestGet: + def test_get_user_returns_principal(self, engine_with_principals): + provider = _make_provider(engine_with_principals) + p = provider.get_user("alice@example.com") + assert p.display_name == "Alice Liddell" + + def test_get_user_raises_when_missing(self, engine_with_principals): + provider = _make_provider(engine_with_principals) + with pytest.raises(DirectoryError, match="not found"): + provider.get_user("nobody@example.com") + + def test_get_user_empty_id_raises(self, engine_with_principals): + provider = _make_provider(engine_with_principals) + with pytest.raises(DirectoryError): + provider.get_user("") + + +class TestProbe: + def test_test_succeeds_when_table_exists(self, engine_with_principals): + provider = _make_provider(engine_with_principals) + provider.test() # no exception + + def test_test_fails_when_table_absent(self): + engine = create_engine("sqlite:///:memory:") + provider = _make_provider(engine, table="missing_table") + with pytest.raises(DirectoryError, match="failed"): + provider.test() + + +class TestConstruction: + def test_requires_db_engine(self): + with pytest.raises(DirectoryError, match="engine"): + LakebaseProvider( + DirectoryProviderContext(), + DirectoryProviderConfig(lakebase_table="principals"), + ) + + def test_requires_table_name(self, engine_with_principals): + with pytest.raises(DirectoryError, match="required"): + LakebaseProvider( + DirectoryProviderContext(db_engine=engine_with_principals), + DirectoryProviderConfig(lakebase_table=""), + ) diff --git a/src/frontend/src/types/directory.ts b/src/frontend/src/types/directory.ts index 91006247..6eacb5d1 100644 --- a/src/frontend/src/types/directory.ts +++ b/src/frontend/src/types/directory.ts @@ -28,7 +28,10 @@ export interface Principal { export interface DirectoryStatus { configured: boolean; provider_type: string | null; + /** Provider-specific config; most fields are null depending on provider_type. */ connection_name: string | null; + lakebase_table?: string | null; + file_path?: string | null; } export interface DirectoryTestResult { @@ -43,6 +46,8 @@ export interface DirectorySearchResponse { export interface DirectorySettingsUpdate { provider_type?: string | null; connection_name?: string | null; + lakebase_table?: string | null; + file_path?: string | null; } export interface UcHttpConnection { @@ -59,5 +64,5 @@ export interface UcHttpConnection { * additional entries here will be rendered disabled in the Settings * tab to telegraph the abstraction. */ -export const DIRECTORY_PROVIDER_TYPES = ['entra'] as const; +export const DIRECTORY_PROVIDER_TYPES = ['entra', 'lakebase', 'file'] as const; export type DirectoryProviderType = (typeof DIRECTORY_PROVIDER_TYPES)[number]; diff --git a/src/frontend/src/views/settings-directory.tsx b/src/frontend/src/views/settings-directory.tsx index 6b3fa067..bbca85c3 100644 --- a/src/frontend/src/views/settings-directory.tsx +++ b/src/frontend/src/views/settings-directory.tsx @@ -1,22 +1,22 @@ /** * Settings → Integrations → Directory. * - * Configures the Directory abstraction (PRD #335). v1 ships one - * concrete provider (Microsoft Entra ID via Microsoft Graph). The - * provider Select renders future providers visible-but-disabled so - * the abstraction is telegraphed to the user. - * - * All Graph traffic goes through a UC HTTP Connection so the app - * never holds a client secret or token cache. The Test button hits - * POST /api/directory/test which surfaces auth / connectivity errors. + * Configures the Directory abstraction (PRD #335). v1 ships three + * concrete providers; the dropdown enables all three and the panel + * below the provider Select switches to the provider-specific inputs + * and help block on the fly. All Directory traffic flows through the + * provider's transport of choice (UC HTTP Connection for Entra; the + * app's own Lakebase DB for Lakebase; a local CSV for File) so the + * app never holds a client secret. */ -import { useEffect, useMemo, useState } from 'react'; +import { ReactNode, useEffect, useMemo, useState } from 'react'; import { Loader2, Plug2 } from 'lucide-react'; import SettingsPageWrapper from '@/components/settings/settings-page-wrapper'; import { Alert, AlertDescription, AlertTitle } from '@/components/ui/alert'; import { Button } from '@/components/ui/button'; +import { Input } from '@/components/ui/input'; import { Label } from '@/components/ui/label'; import { Select, @@ -35,17 +35,16 @@ import type { UcHttpConnection, } from '@/types/directory'; -// Provider options. Only `entra` is enabled in v1; the others render -// disabled so the abstraction is visible (matches the plan). +// Provider options enabled in v1. Adding a new one only requires +// extending this array and the form-state below; the manager picks +// the provider up via its registry on the backend. const PROVIDER_OPTIONS: Array<{ - value: string; + value: 'entra' | 'lakebase' | 'file'; label: string; - disabled: boolean; - helpKey?: string; }> = [ - { value: 'entra', label: 'Microsoft Entra ID', disabled: false, helpKey: 'entra' }, - { value: 'okta', label: 'Okta (coming soon)', disabled: true }, - { value: 'ping', label: 'Ping (coming soon)', disabled: true }, + { value: 'entra', label: 'Microsoft Entra ID' }, + { value: 'lakebase', label: 'Lakebase table' }, + { value: 'file', label: 'CSV file (test / demo)' }, ]; const ENTRA_HELP_LINES = [ @@ -55,6 +54,20 @@ const ENTRA_HELP_LINES = [ ['Grant type', 'client_credentials'], ] as const; +const LAKEBASE_SCHEMA_SQL = `CREATE TABLE main.directory.principals ( + type TEXT NOT NULL, -- 'user' | 'group' + id TEXT NOT NULL, -- UPN/email for users, displayName for groups + display_name TEXT NOT NULL, + sub_label TEXT +); +CREATE INDEX ON main.directory.principals (LOWER(display_name)); +CREATE INDEX ON main.directory.principals (LOWER(id));`; + +const FILE_HELP_CSV = `type,id,display_name,sub_label +user,alice@example.com,Alice Liddell,alice@example.com +user,bob@example.com,Bob Builder,bob@example.com +group,Producers,Data Producers,producers-guid`; + export default function SettingsDirectoryView() { const { get, put, post } = useApi(); const { toast } = useToast(); @@ -63,8 +76,15 @@ export default function SettingsDirectoryView() { const [loading, setLoading] = useState(true); const [saving, setSaving] = useState(false); const [testing, setTesting] = useState(false); + + // Form state. Each provider only reads the field it cares about, + // but we keep all three around so switching providers preserves + // previously-entered values. const [providerType, setProviderType] = useState(''); const [connectionName, setConnectionName] = useState(''); + const [lakebaseTable, setLakebaseTable] = useState(''); + const [filePath, setFilePath] = useState(''); + const [status, setStatus] = useState(null); const [connections, setConnections] = useState([]); const [connectionsLoading, setConnectionsLoading] = useState(false); @@ -89,6 +109,8 @@ export default function SettingsDirectoryView() { setStatus(statusRes.data); setProviderType(statusRes.data.provider_type ?? ''); setConnectionName(statusRes.data.connection_name ?? ''); + setLakebaseTable(statusRes.data.lakebase_table ?? ''); + setFilePath(statusRes.data.file_path ?? ''); } if (connsRes.data && Array.isArray(connsRes.data)) { setConnections(connsRes.data); @@ -101,10 +123,12 @@ export default function SettingsDirectoryView() { }, [get]); const dirty = useMemo(() => { - const persistedProvider = status?.provider_type ?? ''; - const persistedConn = status?.connection_name ?? ''; - return providerType !== persistedProvider || connectionName !== persistedConn; - }, [providerType, connectionName, status]); + if (providerType !== (status?.provider_type ?? '')) return true; + if (connectionName !== (status?.connection_name ?? '')) return true; + if (lakebaseTable !== (status?.lakebase_table ?? '')) return true; + if (filePath !== (status?.file_path ?? '')) return true; + return false; + }, [providerType, connectionName, lakebaseTable, filePath, status]); const canSave = !saving && dirty; const canTest = !!status?.configured && !testing && !dirty; @@ -115,13 +139,12 @@ export default function SettingsDirectoryView() { const body: DirectorySettingsUpdate = { provider_type: providerType || null, connection_name: connectionName || null, + lakebase_table: lakebaseTable || null, + file_path: filePath || null, }; const res = await put('/api/directory/settings', body); if (res.error) throw new Error(res.error); setStatus(res.data); - // Re-pull status into the shared store so existing pickers pick - // up the change without a full page reload, and clear the - // session-sticky degraded flag. await refreshStore(); toast({ title: 'Directory settings saved' }); } catch (err: any) { @@ -163,12 +186,19 @@ export default function SettingsDirectoryView() { const handleClear = async () => { setSaving(true); try { - const body: DirectorySettingsUpdate = { provider_type: null, connection_name: null }; + const body: DirectorySettingsUpdate = { + provider_type: null, + connection_name: null, + lakebase_table: null, + file_path: null, + }; const res = await put('/api/directory/settings', body); if (res.error) throw new Error(res.error); setStatus(res.data); setProviderType(''); setConnectionName(''); + setLakebaseTable(''); + setFilePath(''); await refreshStore(); toast({ title: 'Directory settings cleared' }); } catch (err: any) { @@ -192,13 +222,47 @@ export default function SettingsDirectoryView() { ); } + const providerPanel: ReactNode = (() => { + switch (providerType) { + case 'entra': + return ( + + ); + case 'lakebase': + return ( + + ); + case 'file': + return ( + + ); + default: + return null; + } + })(); + return (

- Connect an external identity provider so users and groups can be picked - from a live directory. All traffic flows through a Unity Catalog HTTP - Connection; the app never stores a client secret. + Connect a principal directory so users and groups can be picked + throughout the app. v1 supports Microsoft Entra ID (via a UC HTTP + Connection), a Postgres / Lakebase table, or a local CSV file for + tests and demos.

@@ -209,7 +273,7 @@ export default function SettingsDirectoryView() { {PROVIDER_OPTIONS.map((opt) => ( - + {opt.label} ))} @@ -217,64 +281,7 @@ export default function SettingsDirectoryView() {
-
- - -
- - {providerType === 'entra' && ( - - Entra ID connection setup - -

- Create a Unity Catalog HTTP connection against Microsoft Graph with - client_credentials. The app's enterprise app must hold at least - User.Read.All and - GroupMember.Read.All (or - Group.Read.All) application scopes. -

-
- {ENTRA_HELP_LINES.map(([k, v]) => ( -
-
{k}
-
- {v} -
-
- ))} -
-
-
- )} + {providerPanel}
@@ -308,3 +315,165 @@ export default function SettingsDirectoryView() { ); } + +// ----- per-provider panels ---------------------------------------------------- + +function EntraPanel({ + connectionName, + setConnectionName, + saving, + connections, + connectionsLoading, +}: { + connectionName: string; + setConnectionName: (v: string) => void; + saving: boolean; + connections: UcHttpConnection[]; + connectionsLoading: boolean; +}) { + return ( + <> +
+ + +
+ + Entra ID connection setup + +

+ Create a Unity Catalog HTTP connection against Microsoft Graph with + client_credentials. The app's enterprise app must hold at least + User.Read.All and + GroupMember.Read.All (or + Group.Read.All) application scopes. +

+
+ {ENTRA_HELP_LINES.map(([k, v]) => ( +
+
{k}
+
+ {v} +
+
+ ))} +
+
+
+ + ); +} + +function LakebasePanel({ + lakebaseTable, + setLakebaseTable, + saving, +}: { + lakebaseTable: string; + setLakebaseTable: (v: string) => void; + saving: boolean; +}) { + return ( + <> +
+ + setLakebaseTable(e.target.value)} + placeholder="catalog.schema.table" + disabled={saving} + /> +

+ Fully-qualified name of a Postgres table on the app's primary + Lakebase database. Identifier segments must contain only letters, + digits, and underscores. +

+
+ + Required schema + +

+ Populate this table from your IdP sync pipeline. Indexes on + lower-cased columns are optional but recommended for snappy + prefix search. +

+
+            {LAKEBASE_SCHEMA_SQL}
+          
+
+
+ + ); +} + +function FilePanel({ + filePath, + setFilePath, + saving, +}: { + filePath: string; + setFilePath: (v: string) => void; + saving: boolean; +}) { + return ( + <> +
+ + setFilePath(e.target.value)} + placeholder="/etc/ontos/principals.csv" + disabled={saving} + /> +

+ Absolute path to a CSV file readable by the app process. Re-read + automatically when the file's mtime advances; no restart needed. +

+
+ + CSV format + +

+ Required columns: type, id, + {' '}display_name. The sub_label column + is optional. type must be user or + {' '}group. +

+
+            {FILE_HELP_CSV}
+          
+
+
+ + ); +}