diff --git a/README.md b/README.md index fb2948f..3a2e46d 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ DataDesignerPlugins/ └── docs/ # Authoring guide, plugin catalog ``` -Each plugin is an independent Python package with its own `pyproject.toml`, tests, and CODEOWNERS. The root workspace auto-discovers plugins via `plugins/*`. +Each plugin is an independent Python package with its own `pyproject.toml`, tests, and CODEOWNERS. The root workspace auto-discovers plugin packages via `plugins/*`. A package can provide one or more Data Designer plugins through the `data_designer.plugins` group. ## Development diff --git a/devtools/ddp/src/ddp/catalog.py b/devtools/ddp/src/ddp/catalog.py index 11943ed..3cdbb76 100644 --- a/devtools/ddp/src/ddp/catalog.py +++ b/devtools/ddp/src/ddp/catalog.py @@ -1,20 +1,70 @@ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -"""Generate a markdown plugin catalog from plugin metadata.""" +"""Generate a markdown plugin catalog from package metadata and plugin objects.""" from __future__ import annotations +import importlib.metadata +import re +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Any + from ddp._repo import find_repo_root, load_toml +PLUGIN_ENTRY_POINT_GROUP = "data_designer.plugins" + + +class CatalogError(RuntimeError): + """Raised when a catalog entry cannot be generated.""" + + +@dataclass(frozen=True) +class CatalogRow: + """One rendered row in the plugin catalog. + + Attributes: + plugin_package: Python package name from ``[project].name``. + version: Package version from ``[project].version``. + name: Runtime DataDesigner plugin name. + plugin_type: Runtime DataDesigner plugin type value. + description: Package description from ``[project].description``. + """ + + plugin_package: str + version: str + name: str + plugin_type: str + description: str + def main() -> None: - """Generate a markdown table of all plugins and print to stdout.""" - repo_root = find_repo_root() - plugins_dir = repo_root / "plugins" + """Generate a markdown table of all plugin entry points and print to stdout.""" + try: + rows = discover_catalog_rows(find_repo_root() / "plugins") + except CatalogError as exc: + print(f"ERROR: {exc}", file=sys.stderr) + sys.exit(1) + + print(render_catalog(rows)) + + +def discover_catalog_rows(plugins_dir: Path) -> list[CatalogRow]: + """Discover catalog rows for local plugin packages. - rows: list[tuple[str, str, str, str]] = [] + Args: + plugins_dir: Repository ``plugins/`` directory. + Returns: + Rows sorted by package name, then runtime plugin name. + + Raises: + CatalogError: If a local entry point is not installed, cannot be loaded, + or does not load to a DataDesigner ``Plugin`` object. + """ + rows: list[CatalogRow] = [] for toml_path in sorted(plugins_dir.glob("*/pyproject.toml")): data = load_toml(toml_path) @@ -23,29 +73,177 @@ def main() -> None: version = project.get("version", "unknown") description = project.get("description", "") - entry_points = project.get("entry-points", {}).get("data_designer.plugins", {}) + entry_points = project.get("entry-points", {}).get(PLUGIN_ENTRY_POINT_GROUP, {}) + for entry_point_name in sorted(entry_points): + rows.append(catalog_row_for_entry_point(name, version, description, entry_point_name)) + + return sorted(rows, key=lambda row: (row.plugin_package, row.name)) + + +def catalog_row_for_entry_point( + package_name: str, + version: str, + description: str, + entry_point_name: str, +) -> CatalogRow: + """Build a catalog row from an installed DataDesigner plugin entry point. + + Args: + package_name: Local plugin package name. + version: Local plugin package version. + description: Local plugin package description. + entry_point_name: Entry point name in the ``data_designer.plugins`` group. + + Returns: + Catalog row with runtime plugin metadata. + + Raises: + CatalogError: If plugin metadata cannot be loaded or read. + """ + plugin = load_plugin_from_entry_point(package_name, entry_point_name) + try: + plugin_name = plugin.name + plugin_type = plugin.plugin_type.value + except Exception as exc: + raise CatalogError( + f"could not read runtime metadata for package {package_name!r} entry point {entry_point_name!r}: {exc}" + ) from exc + + if not isinstance(plugin_name, str) or not plugin_name: + raise CatalogError( + f"package {package_name!r} entry point {entry_point_name!r} has invalid plugin.name {plugin_name!r}" + ) + if not isinstance(plugin_type, str) or not plugin_type: + raise CatalogError( + f"package {package_name!r} entry point {entry_point_name!r} has invalid plugin.plugin_type.value " + f"{plugin_type!r}" + ) + + return CatalogRow( + plugin_package=package_name, + version=version, + name=plugin_name, + plugin_type=plugin_type, + description=description, + ) + + +def load_plugin_from_entry_point(package_name: str, entry_point_name: str) -> Any: + """Load and validate an installed DataDesigner plugin entry point. + + Args: + package_name: Local plugin package name. + entry_point_name: Entry point name in the ``data_designer.plugins`` group. + + Returns: + Loaded DataDesigner ``Plugin`` object. + + Raises: + CatalogError: If the entry point is missing, fails to load, or returns + a non-``Plugin`` object. + """ + try: + from data_designer.plugins.plugin import Plugin + except Exception as exc: + raise CatalogError( + f"could not import DataDesigner Plugin while loading package {package_name!r} " + f"entry point {entry_point_name!r}: {exc}" + ) from exc + + entry_point = find_installed_entry_point(package_name, entry_point_name) + try: + plugin = entry_point.load() + except Exception as exc: + raise CatalogError(f"could not load package {package_name!r} entry point {entry_point_name!r}: {exc}") from exc + + if not isinstance(plugin, Plugin): + raise CatalogError( + f"package {package_name!r} entry point {entry_point_name!r} loaded {type(plugin).__name__}, " + "expected data_designer.plugins.plugin.Plugin" + ) + return plugin + + +def find_installed_entry_point(package_name: str, entry_point_name: str) -> importlib.metadata.EntryPoint: + """Find an installed entry point owned by a local package. + + Args: + package_name: Local plugin package name. + entry_point_name: Entry point name in the ``data_designer.plugins`` group. + + Returns: + Matching installed entry point. + + Raises: + CatalogError: If no installed entry point matches the package and name. + """ + normalized_package_name = normalize_distribution_name(package_name) + for entry_point in importlib.metadata.entry_points(group=PLUGIN_ENTRY_POINT_GROUP): + distribution_name = entry_point_distribution_name(entry_point) + if distribution_name is None: + continue + if ( + normalize_distribution_name(distribution_name) == normalized_package_name + and entry_point.name == entry_point_name + ): + return entry_point + + raise CatalogError( + f"package {package_name!r} entry point {entry_point_name!r} is not installed; " + "run `make sync` before regenerating the catalog" + ) + + +def entry_point_distribution_name(entry_point: importlib.metadata.EntryPoint) -> str | None: + """Return the distribution name that owns an entry point. + + Args: + entry_point: Installed entry point. + + Returns: + Owning distribution name, or ``None`` if it cannot be determined. + """ + distribution = getattr(entry_point, "dist", None) + if distribution is None: + return None + return distribution.metadata.get("Name") + + +def normalize_distribution_name(name: str) -> str: + """Normalize a Python distribution name for comparison. + + Args: + name: Distribution name. + + Returns: + PEP 503-style normalized distribution name. + """ + return re.sub(r"[-_.]+", "-", name).lower() + - if entry_points: - for ep_key in sorted(entry_points): - rows.append((name, version, ep_key, description)) - else: - rows.append((name, version, "", description)) +def render_catalog(rows: list[CatalogRow]) -> str: + """Render catalog rows as a markdown table. - rows.sort(key=lambda r: (r[0], r[2])) + Args: + rows: Catalog rows to render. + Returns: + Markdown catalog content. + """ lines = [ "# Plugin Catalog", "", - "Auto-generated from plugin metadata. Do not edit manually.", + "Auto-generated from installed local DataDesigner plugins and package metadata. Do not edit manually.", "", - "| Plugin | Version | Column Type | Description |", - "|--------|---------|-------------|-------------|", + "| Plugin Package | Version | Name | Type | Description |", + "|----------------|---------|------|------|-------------|", ] - for name, version, column_type, description in rows: - ct = f"`{column_type}`" if column_type else "" - lines.append(f"| {name} | {version} | {ct} | {description} |") + for row in rows: + lines.append( + f"| {row.plugin_package} | {row.version} | `{row.name}` | `{row.plugin_type}` | {row.description} |" + ) - print("\n".join(lines)) + return "\n".join(lines) if __name__ == "__main__": diff --git a/devtools/ddp/src/ddp/cli.py b/devtools/ddp/src/ddp/cli.py index 8af3319..bb9d38c 100644 --- a/devtools/ddp/src/ddp/cli.py +++ b/devtools/ddp/src/ddp/cli.py @@ -47,8 +47,8 @@ def build_parser() -> argparse.ArgumentParser: "catalog", help="Generate plugin catalog to stdout", description=( - "Generate a markdown table of all plugins and their metadata " - "(name, version, column type, description) to stdout. " + "Generate a markdown table of local DataDesigner plugins and package metadata " + "(package, version, name, type, description) to stdout. " "Typically redirected to docs/catalog.md." ), ) diff --git a/devtools/ddp/tests/test_catalog.py b/devtools/ddp/tests/test_catalog.py index b49809d..3ed3bd5 100644 --- a/devtools/ddp/tests/test_catalog.py +++ b/devtools/ddp/tests/test_catalog.py @@ -6,23 +6,183 @@ from __future__ import annotations import io +import textwrap from contextlib import redirect_stdout +from pathlib import Path -from ddp.catalog import main +import pytest + +from ddp import catalog + + +class FakePluginType: + """Plugin type stand-in with the DataDesigner enum ``value`` interface.""" + + def __init__(self, value: str) -> None: + """Initialize the fake plugin type. + + Args: + value: Runtime plugin type value. + """ + self.value = value + + +class FakePlugin: + """Plugin stand-in with the runtime catalog metadata interface.""" + + def __init__(self, name: str, plugin_type: str) -> None: + """Initialize the fake plugin. + + Args: + name: Runtime plugin name. + plugin_type: Runtime plugin type value. + """ + self.name = name + self.plugin_type = FakePluginType(plugin_type) + + +class FakePluginLoader: + """Callable fake entry point loader for catalog row tests.""" + + def __init__(self, plugins: dict[str, FakePlugin]) -> None: + """Initialize the fake loader. + + Args: + plugins: Fake plugins keyed by entry point name. + """ + self.plugins = plugins + self.calls: list[tuple[str, str]] = [] + + def __call__(self, package_name: str, entry_point_name: str) -> FakePlugin: + """Load a fake plugin by entry point name. + + Args: + package_name: Local plugin package name. + entry_point_name: Entry point name in the ``data_designer.plugins`` group. + + Returns: + Fake plugin object. + """ + self.calls.append((package_name, entry_point_name)) + return self.plugins[entry_point_name] + + +def write_plugin_pyproject( + plugins_dir: Path, + package_name: str, + version: str, + description: str, + entry_points: dict[str, str], +) -> None: + """Write a minimal plugin pyproject for catalog tests. + + Args: + plugins_dir: Temporary ``plugins/`` directory. + package_name: Package name for ``[project].name``. + version: Package version for ``[project].version``. + description: Package description for ``[project].description``. + entry_points: Entry points for ``data_designer.plugins``. + """ + plugin_dir = plugins_dir / package_name + plugin_dir.mkdir(parents=True) + entry_point_lines = "\n".join(f'{name} = "{value}"' for name, value in entry_points.items()) + pyproject = textwrap.dedent( + f""" + [project] + name = "{package_name}" + version = "{version}" + description = "{description}" + + [project.entry-points."data_designer.plugins"] + {entry_point_lines} + """ + ).lstrip() + (plugin_dir / "pyproject.toml").write_text(pyproject, encoding="utf-8") def test_main_produces_markdown_table() -> None: buf = io.StringIO() with redirect_stdout(buf): - main() + catalog.main() output = buf.getvalue() assert "# Plugin Catalog" in output - assert "| Plugin | Version | Column Type | Description |" in output + assert "| Plugin Package | Version | Name | Type | Description |" in output + + +def test_discover_catalog_rows_uses_entry_point_runtime_metadata(monkeypatch, tmp_path: Path) -> None: + plugins_dir = tmp_path / "plugins" + write_plugin_pyproject( + plugins_dir=plugins_dir, + package_name="data-designer-multi", + version="1.2.3", + description="Package-level description", + entry_points={ + "z-entry": "example.plugin:z_plugin", + "a-entry": "example.plugin:a_plugin", + }, + ) + loader = FakePluginLoader( + { + "z-entry": FakePlugin(name="z-runtime-name", plugin_type="processor"), + "a-entry": FakePlugin(name="a-runtime-name", plugin_type="seed-reader"), + } + ) + monkeypatch.setattr(catalog, "load_plugin_from_entry_point", loader) + + rows = catalog.discover_catalog_rows(plugins_dir) + + assert loader.calls == [ + ("data-designer-multi", "a-entry"), + ("data-designer-multi", "z-entry"), + ] + assert rows == [ + catalog.CatalogRow( + plugin_package="data-designer-multi", + version="1.2.3", + name="a-runtime-name", + plugin_type="seed-reader", + description="Package-level description", + ), + catalog.CatalogRow( + plugin_package="data-designer-multi", + version="1.2.3", + name="z-runtime-name", + plugin_type="processor", + description="Package-level description", + ), + ] + + +def test_render_catalog_outputs_plugin_entry_point_rows() -> None: + output = catalog.render_catalog( + [ + catalog.CatalogRow( + plugin_package="data-designer-example", + version="0.2.0", + name="runtime-name", + plugin_type="column-generator", + description="Package description", + ) + ] + ) + + assert "| data-designer-example | 0.2.0 | `runtime-name` | `column-generator` | Package description |" in output + + +def test_missing_installed_entry_point_error_names_package_and_entry_point() -> None: + with pytest.raises(catalog.CatalogError) as exc_info: + catalog.find_installed_entry_point("data-designer-ddp-test-missing", "missing-entry") + + message = str(exc_info.value) + assert "data-designer-ddp-test-missing" in message + assert "missing-entry" in message def test_main_includes_template_plugin() -> None: buf = io.StringIO() with redirect_stdout(buf): - main() + catalog.main() output = buf.getvalue() assert "data-designer-template" in output + assert "`text-transform`" in output + assert "`column-generator`" in output diff --git a/docs/adding-a-plugin.md b/docs/adding-a-plugin.md index 241fcb0..c6fef4d 100644 --- a/docs/adding-a-plugin.md +++ b/docs/adding-a-plugin.md @@ -63,11 +63,12 @@ def test_valid_plugin(): ## 4. Regenerate Metadata ```bash +make sync uv run ddp catalog > docs/catalog.md uv run ddp codeowners > .github/CODEOWNERS ``` -CI will reject your PR if these are stale. +`ddp catalog` reads the installed local `data_designer.plugins` registrations, so run `make sync` after adding or changing plugin registrations. CI will reject your PR if generated metadata is stale. ## 5. Submit @@ -190,9 +191,11 @@ Pre-release versions won't be installed by default with `pip install`. Users mus ## Entry Point Discovery -Plugins register via `[project.entry-points."data_designer.plugins"]` in `pyproject.toml`. The key is your column type slug; the value points to the `Plugin` instance. Data Designer discovers all installed plugins automatically through this mechanism. +Plugins register via `[project.entry-points."data_designer.plugins"]` in `pyproject.toml`. The key is the Python entry point name; the value points to the `Plugin` instance. Data Designer discovers all installed plugins automatically through this mechanism. ```toml [project.entry-points."data_designer.plugins"] my-plugin = "data_designer_my_plugin.plugin:plugin" ``` + +`docs/catalog.md` lists each installed local Data Designer plugin. Packages that register multiple plugins appear multiple times. The catalog `Name` column comes from the runtime `Plugin.name`, which Data Designer derives from the plugin config discriminator. The `Type` column comes from `Plugin.plugin_type.value`. The `Description` column remains the package-level `[project].description` until Data Designer exposes a plugin-level description field or this repo adds explicit per-plugin catalog metadata. diff --git a/docs/catalog.md b/docs/catalog.md index d3c1211..885369e 100644 --- a/docs/catalog.md +++ b/docs/catalog.md @@ -1,7 +1,7 @@ # Plugin Catalog -Auto-generated from plugin metadata. Do not edit manually. +Auto-generated from installed local DataDesigner plugins and package metadata. Do not edit manually. -| Plugin | Version | Column Type | Description | -|--------|---------|-------------|-------------| -| data-designer-template | 0.1.0 | `text-transform` | Template Data Designer plugin — text transform column generator | +| Plugin Package | Version | Name | Type | Description | +|----------------|---------|------|------|-------------| +| data-designer-template | 0.1.0 | `text-transform` | `column-generator` | Template Data Designer plugin — text transform column generator |