diff --git a/packages/data-designer/src/data_designer/cli/commands/recipes.py b/packages/data-designer/src/data_designer/cli/commands/recipes.py new file mode 100644 index 000000000..101eee9f5 --- /dev/null +++ b/packages/data-designer/src/data_designer/cli/commands/recipes.py @@ -0,0 +1,153 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from pathlib import Path + +import click +import typer +from pydantic import BaseModel, ValidationError + +from data_designer.cli.ui import console, print_error, print_header, print_success +from data_designer.config.errors import InvalidConfigError +from data_designer.config.utils.constants import DEFAULT_NUM_RECORDS +from data_designer.config.utils.io_helpers import smart_load_yaml +from data_designer.interface import DataDesigner +from data_designer.recipes.recipe import DataDesignerRecipe +from data_designer.recipes.registry import RecipeLoadError, RecipeRegistry + + +def list_command() -> None: + """List installed Data Designer recipes.""" + try: + recipes = RecipeRegistry().list_recipes() + except RecipeLoadError as exc: + print_error(str(exc)) + raise typer.Exit(code=1) from exc + + print_header("Installed Data Designer Recipes") + if not recipes: + console.print(" No recipes found.") + return + + for item in recipes: + console.print(f" [bold]{item.entry_point_name}[/bold] — {item.recipe.description}") + + +def show_command( + recipe_name: str = typer.Argument(..., help="Installed recipe name."), +) -> None: + """Show a recipe's metadata and config schema.""" + try: + recipe = RecipeRegistry().get_recipe(recipe_name) + except RecipeLoadError as exc: + print_error(str(exc)) + raise typer.Exit(code=1) from exc + + print_header(f"Recipe: {recipe.name}") + console.print(f" Description: {recipe.description}") + console.print(" Config schema:") + console.print_json(data=recipe.config_model.model_json_schema()) + + +def run_recipe_command( + recipe_name: str = typer.Argument(..., help="Installed recipe name."), + config_path: Path | None = typer.Option( + None, + "--config", + "-c", + help="YAML/JSON recipe configuration file. Omit for recipes with an empty config model.", + ), + mode: str = typer.Option( + "create", + "--mode", + click_type=click.Choice(["create", "preview", "validate"], case_sensitive=False), + help="Execution mode.", + ), + num_records: int = typer.Option( + DEFAULT_NUM_RECORDS, + "--num-records", + "-n", + help="Number of records to generate.", + min=1, + ), + dataset_name: str = typer.Option( + "dataset", + "--dataset-name", + "-d", + help="Name for the generated dataset folder when --mode=create.", + ), + artifact_path: Path | None = typer.Option( + None, + "--artifact-path", + "-o", + help="Path where generated artifacts will be stored. Defaults to ./artifacts.", + ), +) -> None: + """Run an installed Data Designer recipe.""" + try: + recipe = RecipeRegistry().get_recipe(recipe_name) + recipe_config = _load_recipe_config(recipe, config_path) + config_builder = recipe.build_config(recipe_config) + except RecipeLoadError as exc: + print_error(str(exc)) + raise typer.Exit(code=1) from exc + except Exception as exc: + print_error(f"Failed to build recipe {recipe_name!r}: {exc}") + raise typer.Exit(code=1) from exc + + resolved_artifact_path = artifact_path or Path.cwd() / "artifacts" + + print_header("Data Designer Recipe") + console.print(f" Recipe: [bold]{recipe.name}[/bold]") + console.print(f" Mode: [bold]{mode}[/bold]") + console.print(f" Records: [bold]{num_records}[/bold]") + console.print(f" Artifact path: [bold]{resolved_artifact_path}[/bold]") + if config_path is not None: + console.print(f" Config: [bold]{config_path}[/bold]") + console.print() + + data_designer = DataDesigner(artifact_path=resolved_artifact_path) + try: + if mode == "validate": + data_designer.validate(config_builder) + print_success("Recipe configuration is valid") + return + + if mode == "preview": + results = data_designer.preview(config_builder, num_records=num_records) + results.display_sample_record(index=0) + print_success(f"Recipe preview complete — {len(results.dataset)} record(s) generated") + return + + results = data_designer.create(config_builder, num_records=num_records, dataset_name=dataset_name) + if recipe.postprocess is not None: + recipe.postprocess(results, recipe_config) + print_success(f"Recipe create complete — {len(results.load_dataset())} record(s) generated") + except InvalidConfigError as exc: + print_error(f"Recipe configuration is invalid: {exc}") + raise typer.Exit(code=1) from exc + except Exception as exc: + print_error(f"Recipe execution failed: {exc}") + raise typer.Exit(code=1) from exc + + +def _load_recipe_config(recipe: DataDesignerRecipe, config_path: Path | None) -> BaseModel: + """Load and validate a recipe config file.""" + raw_config = {} + if config_path is not None: + try: + raw_config = smart_load_yaml(config_path) + except Exception as exc: + raise RecipeLoadError(f"Failed to load recipe config {config_path}: {exc}") from exc + + if raw_config is None: + raw_config = {} + if not isinstance(raw_config, dict): + raise RecipeLoadError(f"Recipe config for {recipe.name!r} must be a mapping, got {type(raw_config).__name__}.") + + try: + return recipe.config_model.model_validate(raw_config) + except ValidationError as exc: + raise RecipeLoadError(f"Invalid config for recipe {recipe.name!r}: {exc}") from exc diff --git a/packages/data-designer/src/data_designer/cli/main.py b/packages/data-designer/src/data_designer/cli/main.py index a6e68f3fa..b81d23246 100644 --- a/packages/data-designer/src/data_designer/cli/main.py +++ b/packages/data-designer/src/data_designer/cli/main.py @@ -55,6 +55,12 @@ def _is_version_request(args: list[str]) -> bool: "help": "Validate a Data Designer configuration", "rich_help_panel": "Generation", }, + "run-recipe": { + "module": f"{_CMD}.recipes", + "attr": "run_recipe_command", + "help": "Run an installed Data Designer recipe", + "rich_help_panel": "Recipes", + }, } ), add_completion=False, @@ -120,6 +126,26 @@ def _is_version_request(args: list[str]) -> bool: no_args_is_help=True, ) +recipes_app = typer.Typer( + name="recipes", + help="Discover installed Data Designer recipes", + cls=create_lazy_typer_group( + { + "list": { + "module": f"{_CMD}.recipes", + "attr": "list_command", + "help": "List installed recipes", + }, + "show": { + "module": f"{_CMD}.recipes", + "attr": "show_command", + "help": "Show recipe metadata and config schema", + }, + } + ), + no_args_is_help=True, +) + _AGENT_CMD = f"{_CMD}.agent" @@ -150,6 +176,7 @@ def _build_agent_lazy_group(prefix: str) -> dict[str, dict[str, str]]: # Add setup command groups app.add_typer(config_app, name="config", rich_help_panel="Setup") app.add_typer(download_app, name="download", rich_help_panel="Setup") +app.add_typer(recipes_app, name="recipes", rich_help_panel="Recipes") app.add_typer(agent_app, name="agent", rich_help_panel="Agent") diff --git a/packages/data-designer/src/data_designer/recipes/__init__.py b/packages/data-designer/src/data_designer/recipes/__init__.py new file mode 100644 index 000000000..12eaa3f9f --- /dev/null +++ b/packages/data-designer/src/data_designer/recipes/__init__.py @@ -0,0 +1,9 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from data_designer.recipes.recipe import DataDesignerRecipe +from data_designer.recipes.registry import RecipeRegistry + +__all__ = ["DataDesignerRecipe", "RecipeRegistry"] diff --git a/packages/data-designer/src/data_designer/recipes/recipe.py b/packages/data-designer/src/data_designer/recipes/recipe.py new file mode 100644 index 000000000..5266c679b --- /dev/null +++ b/packages/data-designer/src/data_designer/recipes/recipe.py @@ -0,0 +1,39 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from collections.abc import Callable +from dataclasses import dataclass +from typing import Any + +from pydantic import BaseModel + +from data_designer.config.config_builder import DataDesignerConfigBuilder + + +@dataclass(frozen=True) +class DataDesignerRecipe: + """A reusable Data Designer pipeline composition. + + Recipe packages register instances of this class through the + ``data_designer.recipes`` entry point group. The Data Designer CLI owns the + generic execution flow; recipe packages own config validation and the + construction of a :class:`DataDesignerConfigBuilder`. + + Attributes: + name: Stable recipe name used by ``data-designer run-recipe``. + description: Human-readable summary shown by ``data-designer recipes``. + config_model: Pydantic model class used to validate recipe config files. + build_config: Callable that converts a validated recipe config into a + :class:`DataDesignerConfigBuilder`. + postprocess: Optional callback invoked after ``create`` runs. This is + intended for exports or recipe-specific artifacts, not for adding + generation columns. + """ + + name: str + description: str + config_model: type[BaseModel] + build_config: Callable[[BaseModel], DataDesignerConfigBuilder] + postprocess: Callable[[Any, BaseModel], None] | None = None diff --git a/packages/data-designer/src/data_designer/recipes/registry.py b/packages/data-designer/src/data_designer/recipes/registry.py new file mode 100644 index 000000000..18304cf9e --- /dev/null +++ b/packages/data-designer/src/data_designer/recipes/registry.py @@ -0,0 +1,65 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import importlib.metadata +from dataclasses import dataclass + +from data_designer.recipes.recipe import DataDesignerRecipe + +RECIPE_ENTRY_POINT_GROUP = "data_designer.recipes" + + +class RecipeLoadError(Exception): + """Raised when a Data Designer recipe entry point cannot be loaded.""" + + +@dataclass(frozen=True) +class RecipeInfo: + """Metadata for an installed recipe.""" + + entry_point_name: str + recipe: DataDesignerRecipe + + +class RecipeRegistry: + """Discover and load Data Designer recipes from Python entry points.""" + + def list_recipes(self) -> list[RecipeInfo]: + """Return all installed recipes sorted by entry point name.""" + recipes: list[RecipeInfo] = [] + for entry_point in importlib.metadata.entry_points(group=RECIPE_ENTRY_POINT_GROUP): + recipe = self._load_entry_point(entry_point) + recipes.append(RecipeInfo(entry_point_name=entry_point.name, recipe=recipe)) + return sorted(recipes, key=lambda item: item.entry_point_name) + + def get_recipe(self, recipe_name: str) -> DataDesignerRecipe: + """Load a recipe by entry point name or recipe ``name``. + + Args: + recipe_name: Entry point name or ``DataDesignerRecipe.name``. + + Returns: + The requested recipe. + + Raises: + RecipeLoadError: If no matching recipe is installed. + """ + for item in self.list_recipes(): + if recipe_name in (item.entry_point_name, item.recipe.name): + return item.recipe + raise RecipeLoadError(f"No installed Data Designer recipe named {recipe_name!r}.") + + def _load_entry_point(self, entry_point: importlib.metadata.EntryPoint) -> DataDesignerRecipe: + try: + loaded = entry_point.load() + except Exception as exc: + raise RecipeLoadError(f"Failed to load recipe entry point {entry_point.name!r}: {exc}") from exc + + if not isinstance(loaded, DataDesignerRecipe): + raise RecipeLoadError( + f"Recipe entry point {entry_point.name!r} returned {type(loaded).__name__}, " + "expected DataDesignerRecipe." + ) + return loaded diff --git a/packages/data-designer/tests/cli/commands/test_recipes_command.py b/packages/data-designer/tests/cli/commands/test_recipes_command.py new file mode 100644 index 000000000..06ae6aa7e --- /dev/null +++ b/packages/data-designer/tests/cli/commands/test_recipes_command.py @@ -0,0 +1,59 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock, patch + +from pydantic import BaseModel + +from data_designer.cli.commands.recipes import run_recipe_command +from data_designer.recipes.recipe import DataDesignerRecipe + + +class DemoRecipeConfig(BaseModel): + """Demo recipe config.""" + + value: str = "default" + + +def test_run_recipe_command_builds_and_creates_recipe() -> None: + mock_config_builder = MagicMock() + build_config = MagicMock(return_value=mock_config_builder) + postprocess = MagicMock() + recipe = DataDesignerRecipe( + name="demo", + description="Demo recipe", + config_model=DemoRecipeConfig, + build_config=build_config, + postprocess=postprocess, + ) + mock_results = MagicMock() + mock_results.load_dataset.return_value = [object(), object()] + + with ( + patch("data_designer.cli.commands.recipes.RecipeRegistry") as mock_registry_cls, + patch("data_designer.cli.commands.recipes.DataDesigner") as mock_data_designer_cls, + ): + mock_registry_cls.return_value.get_recipe.return_value = recipe + mock_data_designer_cls.return_value.create.return_value = mock_results + + run_recipe_command( + recipe_name="demo", + config_path=None, + mode="create", + num_records=2, + dataset_name="demo-dataset", + artifact_path=Path("/tmp/artifacts"), + ) + + mock_registry_cls.return_value.get_recipe.assert_called_once_with("demo") + build_config.assert_called_once() + mock_data_designer_cls.assert_called_once_with(artifact_path=Path("/tmp/artifacts")) + mock_data_designer_cls.return_value.create.assert_called_once_with( + mock_config_builder, + num_records=2, + dataset_name="demo-dataset", + ) + postprocess.assert_called_once() diff --git a/packages/data-designer/tests/cli/test_main.py b/packages/data-designer/tests/cli/test_main.py index 32d9cfc7d..f9e3ab5b2 100644 --- a/packages/data-designer/tests/cli/test_main.py +++ b/packages/data-designer/tests/cli/test_main.py @@ -6,14 +6,20 @@ import importlib.metadata from unittest.mock import Mock, call, patch +from pydantic import BaseModel from typer.testing import CliRunner from data_designer.cli.main import app, main from data_designer.config.utils.constants import DEFAULT_NUM_RECORDS +from data_designer.recipes.recipe import DataDesignerRecipe runner = CliRunner() +class DemoRecipeConfig(BaseModel): + """Demo recipe config.""" + + @patch("data_designer.cli.main.app") @patch("data_designer.cli.main.ensure_cli_default_model_settings") def test_main_bootstraps_before_running_app(mock_bootstrap: Mock, mock_app: Mock) -> None: @@ -85,3 +91,30 @@ def test_app_dispatches_lazy_create_command(mock_controller_cls: Mock) -> None: dataset_name="dataset", artifact_path=None, ) + + +@patch("data_designer.cli.commands.recipes.DataDesigner") +@patch("data_designer.cli.commands.recipes.RecipeRegistry") +def test_app_dispatches_lazy_run_recipe_command(mock_registry_cls: Mock, mock_data_designer_cls: Mock) -> None: + """The Typer app dispatches the recipe command through the lazy command loader.""" + mock_config_builder = Mock() + recipe = DataDesignerRecipe( + name="demo", + description="Demo recipe", + config_model=DemoRecipeConfig, + build_config=Mock(return_value=mock_config_builder), + ) + mock_registry_cls.return_value.get_recipe.return_value = recipe + mock_results = Mock() + mock_results.load_dataset.return_value = [object()] + mock_data_designer_cls.return_value.create.return_value = mock_results + + result = runner.invoke(app, ["run-recipe", "demo", "--num-records", "1"]) + + assert result.exit_code == 0 + mock_registry_cls.return_value.get_recipe.assert_called_once_with("demo") + mock_data_designer_cls.return_value.create.assert_called_once_with( + mock_config_builder, + num_records=1, + dataset_name="dataset", + ) diff --git a/packages/data-designer/tests/recipes/test_registry.py b/packages/data-designer/tests/recipes/test_registry.py new file mode 100644 index 000000000..16613372d --- /dev/null +++ b/packages/data-designer/tests/recipes/test_registry.py @@ -0,0 +1,67 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from typing import Any +from unittest.mock import patch + +import pytest +from pydantic import BaseModel + +from data_designer.recipes.recipe import DataDesignerRecipe +from data_designer.recipes.registry import RecipeLoadError, RecipeRegistry + + +class EmptyRecipeConfig(BaseModel): + """Empty recipe config for registry tests.""" + + +class FakeEntryPoint: + """Minimal entry point stub.""" + + def __init__(self, name: str, loaded: Any): + self.name = name + self._loaded = loaded + + def load(self) -> Any: + return self._loaded + + +def _recipe(name: str = "demo") -> DataDesignerRecipe: + return DataDesignerRecipe( + name=name, + description="Demo recipe", + config_model=EmptyRecipeConfig, + build_config=lambda config: config, # type: ignore[arg-type, return-value] + ) + + +def test_list_recipes_loads_recipe_entry_points() -> None: + recipe = _recipe() + entry_point = FakeEntryPoint("demo-entry-point", recipe) + + with patch("data_designer.recipes.registry.importlib.metadata.entry_points", return_value=[entry_point]): + recipes = RecipeRegistry().list_recipes() + + assert len(recipes) == 1 + assert recipes[0].entry_point_name == "demo-entry-point" + assert recipes[0].recipe is recipe + + +def test_get_recipe_matches_entry_point_or_recipe_name() -> None: + recipe = _recipe(name="demo-recipe") + entry_point = FakeEntryPoint("demo-entry-point", recipe) + + with patch("data_designer.recipes.registry.importlib.metadata.entry_points", return_value=[entry_point]): + registry = RecipeRegistry() + assert registry.get_recipe("demo-entry-point") is recipe + assert registry.get_recipe("demo-recipe") is recipe + + +def test_list_recipes_rejects_non_recipe_entry_points() -> None: + entry_point = FakeEntryPoint("bad", object()) + + with patch("data_designer.recipes.registry.importlib.metadata.entry_points", return_value=[entry_point]): + with pytest.raises(RecipeLoadError, match="expected DataDesignerRecipe"): + RecipeRegistry().list_recipes()