Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 153 additions & 0 deletions packages/data-designer/src/data_designer/cli/commands/recipes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

from pathlib import Path

import click
import typer
from pydantic import BaseModel, ValidationError

from data_designer.cli.ui import console, print_error, print_header, print_success
from data_designer.config.errors import InvalidConfigError
from data_designer.config.utils.constants import DEFAULT_NUM_RECORDS
from data_designer.config.utils.io_helpers import smart_load_yaml
from data_designer.interface import DataDesigner
from data_designer.recipes.recipe import DataDesignerRecipe
from data_designer.recipes.registry import RecipeLoadError, RecipeRegistry


def list_command() -> None:
"""List installed Data Designer recipes."""
try:
recipes = RecipeRegistry().list_recipes()
except RecipeLoadError as exc:
print_error(str(exc))
raise typer.Exit(code=1) from exc

print_header("Installed Data Designer Recipes")
if not recipes:
console.print(" No recipes found.")
return

for item in recipes:
console.print(f" [bold]{item.entry_point_name}[/bold] — {item.recipe.description}")


def show_command(
recipe_name: str = typer.Argument(..., help="Installed recipe name."),
) -> None:
"""Show a recipe's metadata and config schema."""
try:
recipe = RecipeRegistry().get_recipe(recipe_name)
except RecipeLoadError as exc:
print_error(str(exc))
raise typer.Exit(code=1) from exc

print_header(f"Recipe: {recipe.name}")
console.print(f" Description: {recipe.description}")
console.print(" Config schema:")
console.print_json(data=recipe.config_model.model_json_schema())


def run_recipe_command(
recipe_name: str = typer.Argument(..., help="Installed recipe name."),
config_path: Path | None = typer.Option(
None,
"--config",
"-c",
help="YAML/JSON recipe configuration file. Omit for recipes with an empty config model.",
),
mode: str = typer.Option(
"create",
"--mode",
click_type=click.Choice(["create", "preview", "validate"], case_sensitive=False),
help="Execution mode.",
),
num_records: int = typer.Option(
DEFAULT_NUM_RECORDS,
"--num-records",
"-n",
help="Number of records to generate.",
min=1,
),
dataset_name: str = typer.Option(
"dataset",
"--dataset-name",
"-d",
help="Name for the generated dataset folder when --mode=create.",
),
artifact_path: Path | None = typer.Option(
None,
"--artifact-path",
"-o",
help="Path where generated artifacts will be stored. Defaults to ./artifacts.",
),
) -> None:
"""Run an installed Data Designer recipe."""
try:
recipe = RecipeRegistry().get_recipe(recipe_name)
recipe_config = _load_recipe_config(recipe, config_path)
config_builder = recipe.build_config(recipe_config)
except RecipeLoadError as exc:
print_error(str(exc))
raise typer.Exit(code=1) from exc
except Exception as exc:
print_error(f"Failed to build recipe {recipe_name!r}: {exc}")
raise typer.Exit(code=1) from exc

resolved_artifact_path = artifact_path or Path.cwd() / "artifacts"

print_header("Data Designer Recipe")
console.print(f" Recipe: [bold]{recipe.name}[/bold]")
console.print(f" Mode: [bold]{mode}[/bold]")
console.print(f" Records: [bold]{num_records}[/bold]")
console.print(f" Artifact path: [bold]{resolved_artifact_path}[/bold]")
if config_path is not None:
console.print(f" Config: [bold]{config_path}[/bold]")
console.print()

data_designer = DataDesigner(artifact_path=resolved_artifact_path)
try:
if mode == "validate":
data_designer.validate(config_builder)
print_success("Recipe configuration is valid")
return

if mode == "preview":
results = data_designer.preview(config_builder, num_records=num_records)
results.display_sample_record(index=0)
print_success(f"Recipe preview complete — {len(results.dataset)} record(s) generated")
return

results = data_designer.create(config_builder, num_records=num_records, dataset_name=dataset_name)
if recipe.postprocess is not None:
recipe.postprocess(results, recipe_config)
print_success(f"Recipe create complete — {len(results.load_dataset())} record(s) generated")
except InvalidConfigError as exc:
print_error(f"Recipe configuration is invalid: {exc}")
raise typer.Exit(code=1) from exc
except Exception as exc:
print_error(f"Recipe execution failed: {exc}")
raise typer.Exit(code=1) from exc


def _load_recipe_config(recipe: DataDesignerRecipe, config_path: Path | None) -> BaseModel:
"""Load and validate a recipe config file."""
raw_config = {}
if config_path is not None:
try:
raw_config = smart_load_yaml(config_path)
except Exception as exc:
raise RecipeLoadError(f"Failed to load recipe config {config_path}: {exc}") from exc

if raw_config is None:
raw_config = {}
if not isinstance(raw_config, dict):
raise RecipeLoadError(f"Recipe config for {recipe.name!r} must be a mapping, got {type(raw_config).__name__}.")

try:
return recipe.config_model.model_validate(raw_config)
except ValidationError as exc:
raise RecipeLoadError(f"Invalid config for recipe {recipe.name!r}: {exc}") from exc
27 changes: 27 additions & 0 deletions packages/data-designer/src/data_designer/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ def _is_version_request(args: list[str]) -> bool:
"help": "Validate a Data Designer configuration",
"rich_help_panel": "Generation",
},
"run-recipe": {
"module": f"{_CMD}.recipes",
"attr": "run_recipe_command",
"help": "Run an installed Data Designer recipe",
"rich_help_panel": "Recipes",
},
}
),
add_completion=False,
Expand Down Expand Up @@ -120,6 +126,26 @@ def _is_version_request(args: list[str]) -> bool:
no_args_is_help=True,
)

recipes_app = typer.Typer(
name="recipes",
help="Discover installed Data Designer recipes",
cls=create_lazy_typer_group(
{
"list": {
"module": f"{_CMD}.recipes",
"attr": "list_command",
"help": "List installed recipes",
},
"show": {
"module": f"{_CMD}.recipes",
"attr": "show_command",
"help": "Show recipe metadata and config schema",
},
}
),
no_args_is_help=True,
)

_AGENT_CMD = f"{_CMD}.agent"


Expand Down Expand Up @@ -150,6 +176,7 @@ def _build_agent_lazy_group(prefix: str) -> dict[str, dict[str, str]]:
# Add setup command groups
app.add_typer(config_app, name="config", rich_help_panel="Setup")
app.add_typer(download_app, name="download", rich_help_panel="Setup")
app.add_typer(recipes_app, name="recipes", rich_help_panel="Recipes")
app.add_typer(agent_app, name="agent", rich_help_panel="Agent")


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

from data_designer.recipes.recipe import DataDesignerRecipe
from data_designer.recipes.registry import RecipeRegistry

__all__ = ["DataDesignerRecipe", "RecipeRegistry"]
39 changes: 39 additions & 0 deletions packages/data-designer/src/data_designer/recipes/recipe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

from collections.abc import Callable
from dataclasses import dataclass
from typing import Any

from pydantic import BaseModel

from data_designer.config.config_builder import DataDesignerConfigBuilder


@dataclass(frozen=True)
class DataDesignerRecipe:
"""A reusable Data Designer pipeline composition.

Recipe packages register instances of this class through the
``data_designer.recipes`` entry point group. The Data Designer CLI owns the
generic execution flow; recipe packages own config validation and the
construction of a :class:`DataDesignerConfigBuilder`.

Attributes:
name: Stable recipe name used by ``data-designer run-recipe``.
description: Human-readable summary shown by ``data-designer recipes``.
config_model: Pydantic model class used to validate recipe config files.
build_config: Callable that converts a validated recipe config into a
:class:`DataDesignerConfigBuilder`.
postprocess: Optional callback invoked after ``create`` runs. This is
intended for exports or recipe-specific artifacts, not for adding
generation columns.
"""

name: str
description: str
config_model: type[BaseModel]
build_config: Callable[[BaseModel], DataDesignerConfigBuilder]
postprocess: Callable[[Any, BaseModel], None] | None = None
65 changes: 65 additions & 0 deletions packages/data-designer/src/data_designer/recipes/registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

import importlib.metadata
from dataclasses import dataclass

from data_designer.recipes.recipe import DataDesignerRecipe

RECIPE_ENTRY_POINT_GROUP = "data_designer.recipes"


class RecipeLoadError(Exception):
"""Raised when a Data Designer recipe entry point cannot be loaded."""


@dataclass(frozen=True)
class RecipeInfo:
"""Metadata for an installed recipe."""

entry_point_name: str
recipe: DataDesignerRecipe


class RecipeRegistry:
"""Discover and load Data Designer recipes from Python entry points."""

def list_recipes(self) -> list[RecipeInfo]:
"""Return all installed recipes sorted by entry point name."""
recipes: list[RecipeInfo] = []
for entry_point in importlib.metadata.entry_points(group=RECIPE_ENTRY_POINT_GROUP):
recipe = self._load_entry_point(entry_point)
recipes.append(RecipeInfo(entry_point_name=entry_point.name, recipe=recipe))
return sorted(recipes, key=lambda item: item.entry_point_name)

def get_recipe(self, recipe_name: str) -> DataDesignerRecipe:
"""Load a recipe by entry point name or recipe ``name``.

Args:
recipe_name: Entry point name or ``DataDesignerRecipe.name``.

Returns:
The requested recipe.

Raises:
RecipeLoadError: If no matching recipe is installed.
"""
for item in self.list_recipes():
if recipe_name in (item.entry_point_name, item.recipe.name):
return item.recipe
raise RecipeLoadError(f"No installed Data Designer recipe named {recipe_name!r}.")

def _load_entry_point(self, entry_point: importlib.metadata.EntryPoint) -> DataDesignerRecipe:
try:
loaded = entry_point.load()
except Exception as exc:
raise RecipeLoadError(f"Failed to load recipe entry point {entry_point.name!r}: {exc}") from exc

if not isinstance(loaded, DataDesignerRecipe):
raise RecipeLoadError(
f"Recipe entry point {entry_point.name!r} returned {type(loaded).__name__}, "
"expected DataDesignerRecipe."
)
return loaded
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

from pathlib import Path
from unittest.mock import MagicMock, patch

from pydantic import BaseModel

from data_designer.cli.commands.recipes import run_recipe_command
from data_designer.recipes.recipe import DataDesignerRecipe


class DemoRecipeConfig(BaseModel):
"""Demo recipe config."""

value: str = "default"


def test_run_recipe_command_builds_and_creates_recipe() -> None:
mock_config_builder = MagicMock()
build_config = MagicMock(return_value=mock_config_builder)
postprocess = MagicMock()
recipe = DataDesignerRecipe(
name="demo",
description="Demo recipe",
config_model=DemoRecipeConfig,
build_config=build_config,
postprocess=postprocess,
)
mock_results = MagicMock()
mock_results.load_dataset.return_value = [object(), object()]

with (
patch("data_designer.cli.commands.recipes.RecipeRegistry") as mock_registry_cls,
patch("data_designer.cli.commands.recipes.DataDesigner") as mock_data_designer_cls,
):
mock_registry_cls.return_value.get_recipe.return_value = recipe
mock_data_designer_cls.return_value.create.return_value = mock_results

run_recipe_command(
recipe_name="demo",
config_path=None,
mode="create",
num_records=2,
dataset_name="demo-dataset",
artifact_path=Path("/tmp/artifacts"),
)

mock_registry_cls.return_value.get_recipe.assert_called_once_with("demo")
build_config.assert_called_once()
mock_data_designer_cls.assert_called_once_with(artifact_path=Path("/tmp/artifacts"))
mock_data_designer_cls.return_value.create.assert_called_once_with(
mock_config_builder,
num_records=2,
dataset_name="demo-dataset",
)
postprocess.assert_called_once()
Loading
Loading