diff --git a/packages/data-designer-config/src/data_designer/config/__init__.py b/packages/data-designer-config/src/data_designer/config/__init__.py index eb385e15a..ad3ab2414 100644 --- a/packages/data-designer-config/src/data_designer/config/__init__.py +++ b/packages/data-designer-config/src/data_designer/config/__init__.py @@ -82,6 +82,7 @@ UniformSamplerParams, UUIDSamplerParams, ) + from data_designer.config.script_params import DataDesignerScriptParams # noqa: F401 from data_designer.config.seed import ( # noqa: F401 IndexRange, PartitionBlock, @@ -204,6 +205,8 @@ "PartitionBlock": (_MOD_SEED, "PartitionBlock"), "SamplingStrategy": (_MOD_SEED, "SamplingStrategy"), "SeedConfig": (_MOD_SEED, "SeedConfig"), + # script params + "DataDesignerScriptParams": (f"{_MOD_BASE}.script_params", "DataDesignerScriptParams"), # seed_source "DataFrameSeedSource": (f"{_MOD_BASE}.seed_source_dataframe", "DataFrameSeedSource"), "AgentRolloutFormat": (_MOD_SEED_SOURCE, "AgentRolloutFormat"), diff --git a/packages/data-designer-config/src/data_designer/config/script_params.py b/packages/data-designer-config/src/data_designer/config/script_params.py new file mode 100644 index 000000000..2c8d2689b --- /dev/null +++ b/packages/data-designer-config/src/data_designer/config/script_params.py @@ -0,0 +1,17 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from dataclasses import dataclass + + +@dataclass(frozen=True, slots=True) +class DataDesignerScriptParams: + """Runtime parameters forwarded to Python config workflows. + + Attributes: + argv: Raw workflow arguments passed after the CLI ``--`` separator. + """ + + argv: tuple[str, ...] = () diff --git a/packages/data-designer/src/data_designer/cli/commands/create.py b/packages/data-designer/src/data_designer/cli/commands/create.py index ea98222ea..343801d35 100644 --- a/packages/data-designer/src/data_designer/cli/commands/create.py +++ b/packages/data-designer/src/data_designer/cli/commands/create.py @@ -3,21 +3,36 @@ from __future__ import annotations +from typing import Annotated + import click import typer +from data_designer.cli.commands.generation_args import resolve_generation_config_target from data_designer.cli.controllers.generation_controller import GenerationController from data_designer.config.utils.constants import DEFAULT_NUM_RECORDS from data_designer.interface.results import SUPPORTED_EXPORT_FORMATS def create_command( - config_source: str = typer.Argument( - help=( - "Path or URL to a config file (.yaml/.yml/.json), or a local Python module (.py)" - " that defines a load_config_builder() function." + workflow_args: Annotated[ + list[str] | None, + typer.Argument( + metavar="[CONFIG_SOURCE] [-- WORKFLOW_ARGS]", + help=( + "Path or URL to a config file (.yaml/.yml/.json), or a local Python module (.py)" + " that defines a load_config_builder() function. Extra arguments after '--' are forwarded to Python" + " workflows." + ), ), - ), + ] = None, + recipe: Annotated[ + str | None, + typer.Option( + "--recipe", + help="Name of an installed Data Designer recipe to run instead of a config source.", + ), + ] = None, num_records: int = typer.Option( DEFAULT_NUM_RECORDS, "--num-records", @@ -67,9 +82,12 @@ def create_command( # Create from a Python module with custom output path data-designer create my_config.py --artifact-path /path/to/output """ + target = resolve_generation_config_target(workflow_args, recipe) controller = GenerationController() controller.run_create( - config_source=config_source, + config_source=target.config_source, + recipe=target.recipe, + workflow_args=target.workflow_args, num_records=num_records, dataset_name=dataset_name, artifact_path=artifact_path, diff --git a/packages/data-designer/src/data_designer/cli/commands/generation_args.py b/packages/data-designer/src/data_designer/cli/commands/generation_args.py new file mode 100644 index 000000000..d9e8b7813 --- /dev/null +++ b/packages/data-designer/src/data_designer/cli/commands/generation_args.py @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from dataclasses import dataclass + +import click + + +@dataclass(frozen=True) +class GenerationConfigTarget: + """Resolved config target for create, preview, and validate commands.""" + + config_source: str | None + recipe: str | None + workflow_args: tuple[str, ...] + + +def resolve_generation_config_target( + raw_args: list[str] | None, + recipe: str | None, +) -> GenerationConfigTarget: + """Split variadic CLI args into a config source or recipe plus workflow args.""" + args = tuple(raw_args or ()) + if recipe is not None: + return GenerationConfigTarget(config_source=None, recipe=recipe, workflow_args=args) + + if not args: + raise click.UsageError("Missing argument 'CONFIG_SOURCE'. Provide a config source or use --recipe.") + + config_source, *workflow_args = args + return GenerationConfigTarget(config_source=config_source, recipe=None, workflow_args=tuple(workflow_args)) diff --git a/packages/data-designer/src/data_designer/cli/commands/preview.py b/packages/data-designer/src/data_designer/cli/commands/preview.py index 1c08edf4a..b2ed7ac18 100644 --- a/packages/data-designer/src/data_designer/cli/commands/preview.py +++ b/packages/data-designer/src/data_designer/cli/commands/preview.py @@ -3,20 +3,35 @@ from __future__ import annotations +from typing import Annotated + import click import typer +from data_designer.cli.commands.generation_args import resolve_generation_config_target from data_designer.cli.controllers.generation_controller import GenerationController from data_designer.config.utils.constants import DEFAULT_DISPLAY_WIDTH, DEFAULT_NUM_RECORDS def preview_command( - config_source: str = typer.Argument( - help=( - "Path or URL to a config file (.yaml/.yml/.json), or a local Python module (.py)" - " that defines a load_config_builder() function." + workflow_args: Annotated[ + list[str] | None, + typer.Argument( + metavar="[CONFIG_SOURCE] [-- WORKFLOW_ARGS]", + help=( + "Path or URL to a config file (.yaml/.yml/.json), or a local Python module (.py)" + " that defines a load_config_builder() function. Extra arguments after '--' are forwarded to Python" + " workflows." + ), ), - ), + ] = None, + recipe: Annotated[ + str | None, + typer.Option( + "--recipe", + help="Name of an installed Data Designer recipe to run instead of a config source.", + ), + ] = None, num_records: int = typer.Option( DEFAULT_NUM_RECORDS, "--num-records", @@ -54,9 +69,12 @@ def preview_command( ), ) -> None: """Generate a preview dataset for fast iteration on your configuration.""" + target = resolve_generation_config_target(workflow_args, recipe) controller = GenerationController() controller.run_preview( - config_source=config_source, + config_source=target.config_source, + recipe=target.recipe, + workflow_args=target.workflow_args, num_records=num_records, non_interactive=non_interactive, save_results=save_results, diff --git a/packages/data-designer/src/data_designer/cli/commands/recipes.py b/packages/data-designer/src/data_designer/cli/commands/recipes.py new file mode 100644 index 000000000..2a41064d8 --- /dev/null +++ b/packages/data-designer/src/data_designer/cli/commands/recipes.py @@ -0,0 +1,108 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from typing import Annotated + +import click +import typer + +from data_designer.cli.ui import console, print_error, print_header +from data_designer.cli.utils.config_loader import ConfigLoadError +from data_designer.cli.utils.recipe_loader import get_recipe_details, get_recipe_help_text, list_recipes + +OutputFormat = click.Choice(["text", "json"], case_sensitive=False) + + +def list_command( + output: Annotated[ + str, + typer.Option( + "--output", + "-o", + click_type=OutputFormat, + help="Output format.", + ), + ] = "text", +) -> None: + """List installed Data Designer recipes.""" + recipes = list_recipes() + if output == "json": + console.print_json( + data={ + "schema_version": "data-designer.recipes.list.v1", + "recipes": [recipe.to_dict() for recipe in recipes], + } + ) + return + + print_header("Installed Data Designer Recipes") + if not recipes: + console.print(" No recipes found.") + return + + for recipe in recipes: + package = recipe.package or "unknown package" + version = f" {recipe.version}" if recipe.version is not None else "" + console.print(f" [bold]{recipe.name}[/bold] ({package}{version})") + + +def show_command( + recipe_name: Annotated[str, typer.Argument(help="Installed recipe name.")], + output: Annotated[ + str, + typer.Option( + "--output", + "-o", + click_type=OutputFormat, + help="Output format.", + ), + ] = "text", +) -> None: + """Show metadata for an installed Data Designer recipe.""" + try: + details = get_recipe_details(recipe_name) + except ConfigLoadError as exc: + print_error(str(exc)) + raise typer.Exit(code=1) from exc + + if output == "json": + console.print_json( + data={ + "schema_version": "data-designer.recipes.show.v1", + "recipe": details.to_dict(), + } + ) + return + + print_header(f"Recipe: {details.summary.name}") + console.print(f" Entry point: [bold]{details.summary.entry_point}[/bold]") + if details.summary.package is not None: + version = f" {details.summary.version}" if details.summary.version is not None else "" + console.print(f" Package: [bold]{details.summary.package}{version}[/bold]") + if details.description is not None: + console.print(f" Description: {details.description}") + + if not details.arguments: + console.print(" Structured argument metadata: unavailable") + return + + console.print(" Arguments:") + for argument in details.arguments: + flags = ", ".join(argument["flags"]) if argument["flags"] else argument["name"] + required = " required" if argument["required"] else "" + default = "" if argument["default"] is None else f" default={argument['default']!r}" + help_text = "" if argument["help"] is None else f" — {argument['help']}" + console.print(f" [bold]{flags}[/bold]{required}{default}{help_text}") + + +def help_command( + recipe_name: Annotated[str, typer.Argument(help="Installed recipe name.")], +) -> None: + """Show recipe-specific workflow argument help.""" + try: + console.print(get_recipe_help_text(recipe_name), markup=False) + except ConfigLoadError as exc: + print_error(str(exc)) + raise typer.Exit(code=1) from exc diff --git a/packages/data-designer/src/data_designer/cli/commands/validate.py b/packages/data-designer/src/data_designer/cli/commands/validate.py index 19d338816..441f9f719 100644 --- a/packages/data-designer/src/data_designer/cli/commands/validate.py +++ b/packages/data-designer/src/data_designer/cli/commands/validate.py @@ -3,18 +3,33 @@ from __future__ import annotations +from typing import Annotated + import typer +from data_designer.cli.commands.generation_args import resolve_generation_config_target from data_designer.cli.controllers.generation_controller import GenerationController def validate_command( - config_source: str = typer.Argument( - help=( - "Path or URL to a config file (.yaml/.yml/.json), or a local Python module (.py)" - " that defines a load_config_builder() function." + workflow_args: Annotated[ + list[str] | None, + typer.Argument( + metavar="[CONFIG_SOURCE] [-- WORKFLOW_ARGS]", + help=( + "Path or URL to a config file (.yaml/.yml/.json), or a local Python module (.py)" + " that defines a load_config_builder() function. Extra arguments after '--' are forwarded to Python" + " workflows." + ), + ), + ] = None, + recipe: Annotated[ + str | None, + typer.Option( + "--recipe", + help="Name of an installed Data Designer recipe to validate instead of a config source.", ), - ), + ] = None, ) -> None: """Validate a Data Designer configuration. @@ -31,5 +46,8 @@ def validate_command( # Validate a Python module data-designer validate my_config.py """ + target = resolve_generation_config_target(workflow_args, recipe) controller = GenerationController() - controller.run_validate(config_source=config_source) + controller.run_validate( + config_source=target.config_source, recipe=target.recipe, workflow_args=target.workflow_args + ) diff --git a/packages/data-designer/src/data_designer/cli/controllers/generation_controller.py b/packages/data-designer/src/data_designer/cli/controllers/generation_controller.py index 39c45f5f5..aa40414a0 100644 --- a/packages/data-designer/src/data_designer/cli/controllers/generation_controller.py +++ b/packages/data-designer/src/data_designer/cli/controllers/generation_controller.py @@ -12,9 +12,11 @@ import typer from data_designer.cli.ui import console, print_error, print_header, print_success, wait_for_navigation_key -from data_designer.cli.utils.config_loader import ConfigLoadError, load_config_builder +from data_designer.cli.utils.config_loader import ConfigLoadError, WorkflowHelpRequested, load_config_builder +from data_designer.cli.utils.recipe_loader import load_recipe_config_builder from data_designer.cli.utils.sample_records_pager import PAGER_FILENAME, create_sample_records_pager from data_designer.config.errors import InvalidConfigError +from data_designer.config.script_params import DataDesignerScriptParams from data_designer.config.utils.constants import DEFAULT_DISPLAY_WIDTH from data_designer.interface import DataDesigner from data_designer.logging import LOG_INDENT @@ -31,29 +33,33 @@ class GenerationController: def run_preview( self, - config_source: str, + config_source: str | None, num_records: int, non_interactive: bool, save_results: bool = False, artifact_path: str | None = None, theme: Literal["dark", "light"] = "dark", display_width: int = DEFAULT_DISPLAY_WIDTH, + recipe: str | None = None, + workflow_args: tuple[str, ...] = (), ) -> None: """Load config, generate a preview dataset, and display the results. Args: - config_source: Path to a config file or Python module. + config_source: Path to a config file or Python module, or None when using a recipe. num_records: Number of records to generate. non_interactive: If True, display all records at once instead of browsing. save_results: If True, save all preview artifacts to the artifact path. artifact_path: Directory to save results in, or None for ./artifacts. theme: Color theme for HTML output (dark or light). display_width: Maximum width of the rendered record output in characters. + recipe: Installed recipe name, or None when using a config source. + workflow_args: Arguments forwarded to Python config workflows. """ - config_builder = self._load_config(config_source) + config_builder = self._load_config(config_source, recipe=recipe, workflow_args=workflow_args) print_header("Data Designer Preview") - console.print(f" Config: [bold]{config_source}[/bold]") + self._print_config_target(config_source, recipe) console.print(f" Records: [bold]{num_records}[/bold]") console.print() @@ -86,16 +92,23 @@ def run_preview( console.print() print_success(f"Preview complete — {total} record(s) generated") - def run_validate(self, config_source: str) -> None: + def run_validate( + self, + config_source: str | None, + recipe: str | None = None, + workflow_args: tuple[str, ...] = (), + ) -> None: """Load config and validate it against the engine. Args: - config_source: Path to a config file or Python module. + config_source: Path to a config file or Python module, or None when using a recipe. + recipe: Installed recipe name, or None when using a config source. + workflow_args: Arguments forwarded to Python config workflows. """ - config_builder = self._load_config(config_source) + config_builder = self._load_config(config_source, recipe=recipe, workflow_args=workflow_args) print_header("Data Designer Validate") - console.print(f" Config: [bold]{config_source}[/bold]") + self._print_config_target(config_source, recipe) console.print() try: @@ -112,28 +125,32 @@ def run_validate(self, config_source: str) -> None: def run_create( self, - config_source: str, + config_source: str | None, num_records: int, dataset_name: str, artifact_path: str | None, output_format: str | None = None, + recipe: str | None = None, + workflow_args: tuple[str, ...] = (), ) -> None: """Load config, create a full dataset, and save results to disk. Args: - config_source: Path to a config file or Python module. + config_source: Path to a config file or Python module, or None when using a recipe. num_records: Number of records to generate. dataset_name: Name for the generated dataset folder. artifact_path: Path where generated artifacts will be stored, or None for default. output_format: If set, export the dataset to a single file in this format after generation. One of 'jsonl', 'csv', 'parquet'. + recipe: Installed recipe name, or None when using a config source. + workflow_args: Arguments forwarded to Python config workflows. """ - config_builder = self._load_config(config_source) + config_builder = self._load_config(config_source, recipe=recipe, workflow_args=workflow_args) resolved_artifact_path = Path(artifact_path) if artifact_path else Path.cwd() / "artifacts" print_header("Data Designer Create") - console.print(f" Config: [bold]{config_source}[/bold]") + self._print_config_target(config_source, recipe) console.print(f" Records: [bold]{num_records}[/bold]") console.print(f" Dataset name: [bold]{dataset_name}[/bold]") console.print(f" Artifact path: [bold]{resolved_artifact_path}[/bold]") @@ -174,11 +191,18 @@ def run_create( print_success(f"Dataset created — {actual_record_count} record(s) generated") console.print() - def _load_config(self, config_source: str) -> DataDesignerConfigBuilder: + def _load_config( + self, + config_source: str | None, + recipe: str | None = None, + workflow_args: tuple[str, ...] = (), + ) -> DataDesignerConfigBuilder: """Load a config builder from the given source, exiting on failure. Args: - config_source: Path to a config file or Python module. + config_source: Path to a config file or Python module, or None when using a recipe. + recipe: Installed recipe name, or None when using a config source. + workflow_args: Arguments forwarded to Python config workflows. Returns: A DataDesignerConfigBuilder instance. @@ -186,12 +210,26 @@ def _load_config(self, config_source: str) -> DataDesignerConfigBuilder: Raises: typer.Exit: If the config cannot be loaded. """ + script_params = DataDesignerScriptParams(argv=workflow_args) try: - return load_config_builder(config_source) + if recipe is not None: + return load_recipe_config_builder(recipe, script_params=script_params) + if config_source is None: + raise ConfigLoadError("Missing config source. Provide a config source or use --recipe.") + return load_config_builder(config_source, script_params=script_params) + except WorkflowHelpRequested as e: + raise typer.Exit(code=0) from e except ConfigLoadError as e: print_error(str(e)) raise typer.Exit(code=1) + def _print_config_target(self, config_source: str | None, recipe: str | None) -> None: + """Print the config target without echoing forwarded workflow args.""" + if recipe is not None: + console.print(f" Recipe: [bold]{recipe}[/bold]") + return + console.print(f" Config: [bold]{config_source}[/bold]") + def _save_preview_results( self, results: PreviewResults, diff --git a/packages/data-designer/src/data_designer/cli/main.py b/packages/data-designer/src/data_designer/cli/main.py index a6e68f3fa..b813cac2f 100644 --- a/packages/data-designer/src/data_designer/cli/main.py +++ b/packages/data-designer/src/data_designer/cli/main.py @@ -120,6 +120,31 @@ def _is_version_request(args: list[str]) -> bool: no_args_is_help=True, ) +recipes_app = typer.Typer( + name="recipes", + help="Inspect installed Data Designer recipes", + cls=create_lazy_typer_group( + { + "list": { + "module": f"{_CMD}.recipes", + "attr": "list_command", + "help": "List installed recipes", + }, + "show": { + "module": f"{_CMD}.recipes", + "attr": "show_command", + "help": "Show recipe metadata and arguments", + }, + "help": { + "module": f"{_CMD}.recipes", + "attr": "help_command", + "help": "Show recipe-specific argument help", + }, + } + ), + no_args_is_help=True, +) + _AGENT_CMD = f"{_CMD}.agent" @@ -150,6 +175,7 @@ def _build_agent_lazy_group(prefix: str) -> dict[str, dict[str, str]]: # Add setup command groups app.add_typer(config_app, name="config", rich_help_panel="Setup") app.add_typer(download_app, name="download", rich_help_panel="Setup") +app.add_typer(recipes_app, name="recipes", rich_help_panel="Recipes") app.add_typer(agent_app, name="agent", rich_help_panel="Agent") diff --git a/packages/data-designer/src/data_designer/cli/utils/config_loader.py b/packages/data-designer/src/data_designer/cli/utils/config_loader.py index 9fe37b9f1..0b1e11f92 100644 --- a/packages/data-designer/src/data_designer/cli/utils/config_loader.py +++ b/packages/data-designer/src/data_designer/cli/utils/config_loader.py @@ -4,11 +4,15 @@ from __future__ import annotations import importlib.util +import inspect import sys +from collections.abc import Callable from pathlib import Path +from typing import Any from urllib.parse import urlparse from data_designer.config.config_builder import DataDesignerConfigBuilder +from data_designer.config.script_params import DataDesignerScriptParams from data_designer.config.utils.io_helpers import VALID_CONFIG_FILE_EXTENSIONS, is_http_url @@ -16,13 +20,20 @@ class ConfigLoadError(Exception): """Raised when a configuration source cannot be loaded.""" +class WorkflowHelpRequested(Exception): + """Raised when a Python workflow prints help and exits successfully.""" + + PYTHON_EXTENSIONS = {".py"} ALL_SUPPORTED_EXTENSIONS = VALID_CONFIG_FILE_EXTENSIONS | PYTHON_EXTENSIONS USER_MODULE_FUNC_NAME = "load_config_builder" -def load_config_builder(config_source: str) -> DataDesignerConfigBuilder: +def load_config_builder( + config_source: str, + script_params: DataDesignerScriptParams | None = None, +) -> DataDesignerConfigBuilder: """Load a DataDesignerConfigBuilder from a file path or URL. Auto-detects the file type by extension: @@ -32,6 +43,7 @@ def load_config_builder(config_source: str) -> DataDesignerConfigBuilder: Args: config_source: Path or URL to the configuration file, or path to a Python module. + script_params: Optional runtime arguments for Python config workflows. Returns: A DataDesignerConfigBuilder instance. @@ -40,6 +52,7 @@ def load_config_builder(config_source: str) -> DataDesignerConfigBuilder: ConfigLoadError: If the file cannot be loaded or is invalid. """ if is_http_url(config_source): + _reject_script_params_for_static_source(config_source, script_params) return _load_from_config_url(config_source) path = Path(config_source) @@ -57,9 +70,10 @@ def load_config_builder(config_source: str) -> DataDesignerConfigBuilder: raise ConfigLoadError(f"Unsupported file extension '{suffix}'. Supported extensions: {supported}") if suffix in VALID_CONFIG_FILE_EXTENSIONS: + _reject_script_params_for_static_source(str(path), script_params) return _load_from_config_file(path) - return _load_from_python_module(path) + return _load_from_python_module(path, script_params) def _load_from_config_url(config_source: str) -> DataDesignerConfigBuilder: @@ -101,7 +115,10 @@ def _load_from_config_file(path: Path | str) -> DataDesignerConfigBuilder: raise ConfigLoadError(f"Failed to load config from '{path}': {e}") from e -def _load_from_python_module(path: Path) -> DataDesignerConfigBuilder: +def _load_from_python_module( + path: Path, + script_params: DataDesignerScriptParams | None = None, +) -> DataDesignerConfigBuilder: """Load a DataDesignerConfigBuilder from a Python module. The module must define a load_config_builder() function that returns @@ -109,6 +126,7 @@ def _load_from_python_module(path: Path) -> DataDesignerConfigBuilder: Args: path: Path to the Python module. + script_params: Optional runtime arguments for Python config workflows. Returns: A DataDesignerConfigBuilder instance. @@ -149,10 +167,7 @@ def _load_from_python_module(path: Path) -> DataDesignerConfigBuilder: if not callable(func): raise ConfigLoadError(f"'{USER_MODULE_FUNC_NAME}' in '{path}' is not callable") - try: - config_builder = func() - except Exception as e: - raise ConfigLoadError(f"Error calling '{USER_MODULE_FUNC_NAME}()' in '{path}': {e}") from e + config_builder = call_config_builder_function(func, str(path), script_params) if not isinstance(config_builder, DataDesignerConfigBuilder): raise ConfigLoadError( @@ -162,7 +177,7 @@ def _load_from_python_module(path: Path) -> DataDesignerConfigBuilder: return config_builder - except ConfigLoadError: + except (ConfigLoadError, WorkflowHelpRequested): raise except Exception as e: raise ConfigLoadError(f"Failed to execute Python module '{path}': {e}") from e @@ -178,3 +193,106 @@ def _load_from_python_module(path: Path) -> DataDesignerConfigBuilder: sys.path.remove(parent_dir) except ValueError: pass + + +def call_config_builder_function( + func: Callable[..., Any], + source_name: str, + script_params: DataDesignerScriptParams | None = None, +) -> DataDesignerConfigBuilder: + """Call a user-provided config builder function with a supported signature.""" + params = script_params or DataDesignerScriptParams() + try: + signature = inspect.signature(func) + except (TypeError, ValueError) as e: + raise ConfigLoadError(f"Could not inspect '{USER_MODULE_FUNC_NAME}()' in '{source_name}': {e}") from e + + config_builder: Any + if len(signature.parameters) == 0: + if params.argv: + raise ConfigLoadError( + f"'{USER_MODULE_FUNC_NAME}()' in '{source_name}' does not accept workflow arguments. " + "Update it to accept a DataDesignerScriptParams parameter." + ) + try: + config_builder = func() + except SystemExit as e: + if _is_successful_system_exit(e): + raise WorkflowHelpRequested from e + raise ConfigLoadError(f"'{USER_MODULE_FUNC_NAME}()' in '{source_name}' exited with code {e.code}") from e + except Exception as e: + raise ConfigLoadError(f"Error calling '{USER_MODULE_FUNC_NAME}()' in '{source_name}': {e}") from e + else: + _validate_params_signature(signature, source_name) + try: + config_builder = _call_params_aware_function(func, signature, params) + except SystemExit as e: + if _is_successful_system_exit(e): + raise WorkflowHelpRequested from e + raise ConfigLoadError( + f"'{USER_MODULE_FUNC_NAME}(params)' in '{source_name}' exited with code {e.code}" + ) from e + except Exception as e: + raise ConfigLoadError(f"Error calling '{USER_MODULE_FUNC_NAME}(params)' in '{source_name}': {e}") from e + + if not isinstance(config_builder, DataDesignerConfigBuilder): + raise ConfigLoadError( + f"'{USER_MODULE_FUNC_NAME}()' in '{source_name}' returned " + f"{type(config_builder).__name__}, expected DataDesignerConfigBuilder" + ) + + return config_builder + + +def _validate_params_signature(signature: inspect.Signature, source_name: str) -> None: + parameters = list(signature.parameters.values()) + if len(parameters) != 1: + raise ConfigLoadError( + f"Unsupported '{USER_MODULE_FUNC_NAME}()' signature in '{source_name}'. " + "Expected zero arguments or one DataDesignerScriptParams parameter." + ) + + parameter = parameters[0] + supported_kinds = { + inspect.Parameter.POSITIONAL_ONLY, + inspect.Parameter.POSITIONAL_OR_KEYWORD, + inspect.Parameter.KEYWORD_ONLY, + } + if parameter.kind not in supported_kinds: + raise ConfigLoadError( + f"Unsupported '{USER_MODULE_FUNC_NAME}()' signature in '{source_name}'. " + "Expected zero arguments or one DataDesignerScriptParams parameter." + ) + + if parameter.kind == inspect.Parameter.KEYWORD_ONLY and parameter.name != "params": + raise ConfigLoadError( + f"Unsupported '{USER_MODULE_FUNC_NAME}()' signature in '{source_name}'. " + "Keyword-only workflow parameters must be named 'params'." + ) + + +def _call_params_aware_function( + func: Callable[..., Any], + signature: inspect.Signature, + params: DataDesignerScriptParams, +) -> Any: + parameter = next(iter(signature.parameters.values())) + if parameter.kind == inspect.Parameter.KEYWORD_ONLY: + return func(params=params) + return func(params) + + +def _reject_script_params_for_static_source( + source_name: str, + script_params: DataDesignerScriptParams | None, +) -> None: + params = script_params or DataDesignerScriptParams() + if params.argv: + raise ConfigLoadError( + f"Workflow arguments are only supported for Python config modules, but '{source_name}' is not a " + "local Python module." + ) + + +def _is_successful_system_exit(exc: SystemExit) -> bool: + return exc.code is None or exc.code == 0 diff --git a/packages/data-designer/src/data_designer/cli/utils/recipe_loader.py b/packages/data-designer/src/data_designer/cli/utils/recipe_loader.py new file mode 100644 index 000000000..9fe148043 --- /dev/null +++ b/packages/data-designer/src/data_designer/cli/utils/recipe_loader.py @@ -0,0 +1,343 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import argparse +import importlib.metadata +import io +import sys +from contextlib import redirect_stdout +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +import click +import typer + +from data_designer.cli.utils.config_loader import ( + ConfigLoadError, + WorkflowHelpRequested, + call_config_builder_function, + load_config_builder, +) +from data_designer.config.config_builder import DataDesignerConfigBuilder +from data_designer.config.script_params import DataDesignerScriptParams + +RECIPE_ENTRY_POINT_GROUP = "data_designer.recipes" + + +@dataclass(frozen=True) +class RecipeSummary: + """Summary of an installed Data Designer recipe.""" + + name: str + entry_point: str + package: str | None + version: str | None + + def to_dict(self) -> dict[str, Any]: + """Return a JSON-serializable dictionary.""" + return { + "name": self.name, + "entry_point": self.entry_point, + "package": self.package, + "version": self.version, + } + + +@dataclass(frozen=True) +class RecipeDetails: + """Inspectable metadata for an installed Data Designer recipe.""" + + summary: RecipeSummary + description: str | None + help_text: str | None + arguments: list[dict[str, Any]] + + def to_dict(self) -> dict[str, Any]: + """Return a JSON-serializable dictionary.""" + return { + **self.summary.to_dict(), + "description": self.description, + "supports_structured_help": self.help_text is not None, + "arguments": self.arguments, + } + + +def load_recipe_config_builder( + recipe_name: str, + script_params: DataDesignerScriptParams | None = None, +) -> DataDesignerConfigBuilder: + """Load a Data Designer recipe from an installed entry point. + + Recipe entry points may expose either a config builder callable or a string + path to a local config source. Callables follow the same + ``load_config_builder(params)`` protocol as Python config files. + """ + entry_point = _get_recipe_entry_point(recipe_name) + try: + loaded = entry_point.load() + except Exception as exc: + raise ConfigLoadError(f"Failed to load recipe {recipe_name!r}: {exc}") from exc + + if isinstance(loaded, str | Path): + return load_config_builder(str(loaded), script_params=script_params) + + if isinstance(loaded, typer.Typer): + return _call_typer_recipe_app(loaded, recipe_name, script_params) + + if callable(loaded): + return call_config_builder_function(loaded, f"recipe {recipe_name!r}", script_params) + + raise ConfigLoadError( + f"Recipe {recipe_name!r} loaded {type(loaded).__name__}, " + "expected a config builder callable, Typer app, or config path." + ) + + +def list_recipes() -> list[RecipeSummary]: + """List installed Data Designer recipe entry points.""" + return [_entry_point_summary(entry_point) for entry_point in _recipe_entry_points()] + + +def get_recipe_details(recipe_name: str) -> RecipeDetails: + """Return inspectable details for an installed recipe.""" + entry_point = _get_recipe_entry_point(recipe_name) + summary = _entry_point_summary(entry_point) + typer_app = load_recipe_typer_app(recipe_name) + if typer_app is not None: + command = typer.main.get_command(typer_app) + return RecipeDetails( + summary=summary, + description=command.help, + help_text=_click_help_text(command, recipe_name), + arguments=_click_arguments(command), + ) + + parser = load_recipe_arg_parser(recipe_name) + + if parser is None: + return RecipeDetails( + summary=summary, + description=None, + help_text=None, + arguments=[], + ) + + return RecipeDetails( + summary=summary, + description=parser.description, + help_text=parser.format_help(), + arguments=_argparse_arguments(parser), + ) + + +def get_recipe_help_text(recipe_name: str) -> str: + """Return recipe-specific help text for an installed recipe.""" + details = get_recipe_details(recipe_name) + if details.help_text is None: + raise ConfigLoadError( + f"Recipe {recipe_name!r} does not expose structured help. " + "Recipe modules can define build_arg_parser() to support inspection." + ) + return details.help_text + + +def load_recipe_typer_app(recipe_name: str) -> typer.Typer | None: + """Load an optional Typer app exposed by a recipe module.""" + entry_point = _get_recipe_entry_point(recipe_name) + loaded = _load_recipe_entry_point(entry_point, recipe_name) + if isinstance(loaded, typer.Typer): + return loaded + + module = _loaded_recipe_module(loaded) + if module is None: + return None + + build_typer_app = getattr(module, "build_typer_app", None) + if build_typer_app is None: + return None + if not callable(build_typer_app): + raise ConfigLoadError(f"Recipe {recipe_name!r} defines build_typer_app, but it is not callable.") + + app = build_typer_app() + if not isinstance(app, typer.Typer): + raise ConfigLoadError( + f"Recipe {recipe_name!r} build_typer_app() returned {type(app).__name__}, expected Typer." + ) + return app + + +def load_recipe_arg_parser(recipe_name: str) -> argparse.ArgumentParser | None: + """Load an optional argparse parser exposed by a recipe module.""" + entry_point = _get_recipe_entry_point(recipe_name) + loaded = _load_recipe_entry_point(entry_point, recipe_name) + module = _loaded_recipe_module(loaded) + if module is None: + return None + + build_arg_parser = getattr(module, "build_arg_parser", None) + if build_arg_parser is None: + return None + if not callable(build_arg_parser): + raise ConfigLoadError(f"Recipe {recipe_name!r} defines build_arg_parser, but it is not callable.") + + parser = build_arg_parser() + if not isinstance(parser, argparse.ArgumentParser): + raise ConfigLoadError( + f"Recipe {recipe_name!r} build_arg_parser() returned {type(parser).__name__}, expected ArgumentParser." + ) + return parser + + +def _loaded_recipe_module(loaded: Any) -> Any | None: + module_name = getattr(loaded, "__module__", None) + if module_name is None: + return None + return sys.modules.get(module_name) + + +def _call_typer_recipe_app( + app: typer.Typer, + recipe_name: str, + script_params: DataDesignerScriptParams | None, +) -> DataDesignerConfigBuilder: + params = script_params or DataDesignerScriptParams() + command = typer.main.get_command(app) + try: + config_builder = command.main( + args=list(params.argv), + prog_name=f"data-designer preview/create --recipe {recipe_name} --", + standalone_mode=False, + ) + except click.exceptions.Exit as exc: + if exc.exit_code == 0: + raise WorkflowHelpRequested from exc + raise ConfigLoadError(f"Recipe {recipe_name!r} exited with code {exc.exit_code}") from exc + except click.ClickException as exc: + raise ConfigLoadError(f"Error parsing recipe {recipe_name!r} arguments: {exc.format_message()}") from exc + + if config_builder == 0 and any(arg in {"--help", "-h"} for arg in params.argv): + raise WorkflowHelpRequested + + if not isinstance(config_builder, DataDesignerConfigBuilder): + raise ConfigLoadError( + f"Recipe {recipe_name!r} returned {type(config_builder).__name__}, expected DataDesignerConfigBuilder" + ) + return config_builder + + +def _get_recipe_entry_point(recipe_name: str) -> importlib.metadata.EntryPoint: + recipes = _recipe_entry_points() + for entry_point in recipes: + if entry_point.name == recipe_name: + return entry_point + + available = ", ".join(sorted(entry_point.name for entry_point in recipes)) or "none" + raise ConfigLoadError( + f"No installed Data Designer recipe named {recipe_name!r}. " + f"Expected an entry point in {RECIPE_ENTRY_POINT_GROUP!r}. Available recipes: {available}." + ) + + +def _recipe_entry_points() -> list[importlib.metadata.EntryPoint]: + return sorted( + importlib.metadata.entry_points(group=RECIPE_ENTRY_POINT_GROUP), key=lambda entry_point: entry_point.name + ) + + +def _load_recipe_entry_point(entry_point: importlib.metadata.EntryPoint, recipe_name: str) -> Any: + try: + return entry_point.load() + except Exception as exc: + raise ConfigLoadError(f"Failed to load recipe {recipe_name!r}: {exc}") from exc + + +def _entry_point_summary(entry_point: importlib.metadata.EntryPoint) -> RecipeSummary: + distribution = getattr(entry_point, "dist", None) + return RecipeSummary( + name=entry_point.name, + entry_point=getattr(entry_point, "value", ""), + package=_distribution_metadata(distribution, "Name"), + version=_distribution_metadata(distribution, "Version"), + ) + + +def _distribution_metadata(distribution: Any, key: str) -> str | None: + if distribution is None: + return None + metadata = getattr(distribution, "metadata", None) + if metadata is None: + return None + value = metadata.get(key) + return str(value) if value is not None else None + + +def _click_help_text(command: click.Command, recipe_name: str) -> str: + stdout = io.StringIO() + with ( + redirect_stdout(stdout), + click.Context(command, info_name=f"data-designer preview/create --recipe {recipe_name} --") as context, + ): + help_text = command.get_help(context) + return help_text or stdout.getvalue() + + +def _click_arguments(command: click.Command) -> list[dict[str, Any]]: + arguments: list[dict[str, Any]] = [] + for parameter in command.params: + if getattr(parameter, "hidden", False): + continue + flags = [] + if isinstance(parameter, click.Option): + flags = [*parameter.opts, *parameter.secondary_opts] + arguments.append( + { + "name": parameter.name, + "flags": flags, + "required": bool(parameter.required), + "default": _jsonable_default(parameter.default), + "choices": _click_choices(parameter.type), + "nargs": parameter.nargs, + "help": getattr(parameter, "help", None), + } + ) + return arguments + + +def _click_choices(parameter_type: click.ParamType) -> list[str] | None: + if isinstance(parameter_type, click.Choice): + return list(parameter_type.choices) + return None + + +def _argparse_arguments(parser: argparse.ArgumentParser) -> list[dict[str, Any]]: + arguments: list[dict[str, Any]] = [] + for action in parser._actions: + if action.help == argparse.SUPPRESS: + continue + if isinstance(action, argparse._HelpAction): + continue + arguments.append( + { + "name": action.dest, + "flags": list(action.option_strings), + "required": bool(getattr(action, "required", False)), + "default": _jsonable_default(action.default), + "choices": list(action.choices) if action.choices is not None else None, + "nargs": action.nargs, + "help": action.help, + } + ) + return arguments + + +def _jsonable_default(value: Any) -> Any: + if value is argparse.SUPPRESS: + return None + if isinstance(value, Path): + return str(value) + if isinstance(value, tuple | set): + return list(value) + return value diff --git a/packages/data-designer/tests/cli/commands/test_create_command.py b/packages/data-designer/tests/cli/commands/test_create_command.py index fc779df7c..4db28b52f 100644 --- a/packages/data-designer/tests/cli/commands/test_create_command.py +++ b/packages/data-designer/tests/cli/commands/test_create_command.py @@ -19,12 +19,14 @@ def test_create_command_delegates_to_controller(mock_ctrl_cls: MagicMock) -> Non mock_ctrl_cls.return_value = mock_ctrl create_command( - config_source="config.yaml", num_records=10, dataset_name="dataset", artifact_path=None, output_format=None + workflow_args=["config.yaml"], num_records=10, dataset_name="dataset", artifact_path=None, output_format=None ) mock_ctrl_cls.assert_called_once() mock_ctrl.run_create.assert_called_once_with( config_source="config.yaml", + recipe=None, + workflow_args=(), num_records=10, dataset_name="dataset", artifact_path=None, @@ -39,7 +41,7 @@ def test_create_command_passes_custom_options(mock_ctrl_cls: MagicMock) -> None: mock_ctrl_cls.return_value = mock_ctrl create_command( - config_source="config.py", + workflow_args=["config.py", "--seed-path", "seed.jsonl"], num_records=100, dataset_name="my_data", artifact_path="/custom/output", @@ -48,6 +50,8 @@ def test_create_command_passes_custom_options(mock_ctrl_cls: MagicMock) -> None: mock_ctrl.run_create.assert_called_once_with( config_source="config.py", + recipe=None, + workflow_args=("--seed-path", "seed.jsonl"), num_records=100, dataset_name="my_data", artifact_path="/custom/output", @@ -62,11 +66,13 @@ def test_create_command_default_artifact_path_is_none(mock_ctrl_cls: MagicMock) mock_ctrl_cls.return_value = mock_ctrl create_command( - config_source="config.yaml", num_records=5, dataset_name="ds", artifact_path=None, output_format=None + workflow_args=["config.yaml"], num_records=5, dataset_name="ds", artifact_path=None, output_format=None ) mock_ctrl.run_create.assert_called_once_with( config_source="config.yaml", + recipe=None, + workflow_args=(), num_records=5, dataset_name="ds", artifact_path=None, @@ -81,7 +87,7 @@ def test_create_command_passes_output_format(mock_ctrl_cls: MagicMock) -> None: mock_ctrl_cls.return_value = mock_ctrl create_command( - config_source="config.yaml", + workflow_args=["config.yaml"], num_records=10, dataset_name="dataset", artifact_path=None, @@ -90,8 +96,36 @@ def test_create_command_passes_output_format(mock_ctrl_cls: MagicMock) -> None: mock_ctrl.run_create.assert_called_once_with( config_source="config.yaml", + recipe=None, + workflow_args=(), num_records=10, dataset_name="dataset", artifact_path=None, output_format="jsonl", ) + + +@patch("data_designer.cli.commands.create.GenerationController") +def test_create_command_passes_recipe_target(mock_ctrl_cls: MagicMock) -> None: + """Test create_command forwards --recipe and workflow args to the controller.""" + mock_ctrl = MagicMock() + mock_ctrl_cls.return_value = mock_ctrl + + create_command( + workflow_args=["--input-dir", "docs"], + recipe="retrieval-sdg", + num_records=10, + dataset_name="dataset", + artifact_path=None, + output_format=None, + ) + + mock_ctrl.run_create.assert_called_once_with( + config_source=None, + recipe="retrieval-sdg", + workflow_args=("--input-dir", "docs"), + num_records=10, + dataset_name="dataset", + artifact_path=None, + output_format=None, + ) diff --git a/packages/data-designer/tests/cli/commands/test_preview_command.py b/packages/data-designer/tests/cli/commands/test_preview_command.py index d9420a094..256f60e20 100644 --- a/packages/data-designer/tests/cli/commands/test_preview_command.py +++ b/packages/data-designer/tests/cli/commands/test_preview_command.py @@ -20,7 +20,7 @@ [ pytest.param( { - "config_source": "config.yaml", + "workflow_args": ["config.yaml"], "num_records": 5, "non_interactive": True, "save_results": False, @@ -32,7 +32,7 @@ ), pytest.param( { - "config_source": "config.yaml", + "workflow_args": ["config.yaml"], "num_records": 10, "non_interactive": False, "save_results": False, @@ -44,7 +44,7 @@ ), pytest.param( { - "config_source": "my_config.py", + "workflow_args": ["my_config.py", "--variant", "compact"], "num_records": 20, "non_interactive": True, "save_results": False, @@ -56,7 +56,7 @@ ), pytest.param( { - "config_source": "config.yaml", + "workflow_args": ["config.yaml"], "num_records": 5, "non_interactive": True, "save_results": True, @@ -68,7 +68,7 @@ ), pytest.param( { - "config_source": "config.yaml", + "workflow_args": ["config.yaml"], "num_records": 5, "non_interactive": True, "save_results": True, @@ -88,8 +88,45 @@ def test_preview_command_delegates_to_controller(mock_ctrl_cls: MagicMock, kwarg preview_command(**kwargs) + expected = { + **kwargs, + "config_source": kwargs["workflow_args"][0], + "recipe": None, + "workflow_args": tuple(kwargs["workflow_args"][1:]), + } + mock_ctrl_cls.assert_called_once() - mock_ctrl.run_preview.assert_called_once_with(**kwargs) + mock_ctrl.run_preview.assert_called_once_with(**expected) + + +@patch("data_designer.cli.commands.preview.GenerationController") +def test_preview_command_passes_recipe_target(mock_ctrl_cls: MagicMock) -> None: + """Test preview_command forwards --recipe and workflow args to the controller.""" + mock_ctrl = MagicMock() + mock_ctrl_cls.return_value = mock_ctrl + + preview_command( + workflow_args=["--input-dir", "docs"], + recipe="retrieval-sdg", + num_records=3, + non_interactive=True, + save_results=False, + artifact_path=None, + theme="dark", + display_width=110, + ) + + mock_ctrl.run_preview.assert_called_once_with( + config_source=None, + recipe="retrieval-sdg", + workflow_args=("--input-dir", "docs"), + num_records=3, + non_interactive=True, + save_results=False, + artifact_path=None, + theme="dark", + display_width=110, + ) # --------------------------------------------------------------------------- diff --git a/packages/data-designer/tests/cli/commands/test_validate_command.py b/packages/data-designer/tests/cli/commands/test_validate_command.py index 2447c240d..8f68d88cf 100644 --- a/packages/data-designer/tests/cli/commands/test_validate_command.py +++ b/packages/data-designer/tests/cli/commands/test_validate_command.py @@ -18,10 +18,10 @@ def test_validate_command_delegates_to_controller(mock_ctrl_cls: MagicMock) -> N mock_ctrl = MagicMock() mock_ctrl_cls.return_value = mock_ctrl - validate_command(config_source="config.yaml") + validate_command(workflow_args=["config.yaml"]) mock_ctrl_cls.assert_called_once() - mock_ctrl.run_validate.assert_called_once_with(config_source="config.yaml") + mock_ctrl.run_validate.assert_called_once_with(config_source="config.yaml", recipe=None, workflow_args=()) @patch("data_designer.cli.commands.validate.GenerationController") @@ -30,6 +30,25 @@ def test_validate_command_passes_python_module_source(mock_ctrl_cls: MagicMock) mock_ctrl = MagicMock() mock_ctrl_cls.return_value = mock_ctrl - validate_command(config_source="my_config.py") + validate_command(workflow_args=["my_config.py", "--seed-path", "seed.jsonl"]) - mock_ctrl.run_validate.assert_called_once_with(config_source="my_config.py") + mock_ctrl.run_validate.assert_called_once_with( + config_source="my_config.py", + recipe=None, + workflow_args=("--seed-path", "seed.jsonl"), + ) + + +@patch("data_designer.cli.commands.validate.GenerationController") +def test_validate_command_passes_recipe_target(mock_ctrl_cls: MagicMock) -> None: + """Test validate_command forwards --recipe and workflow args to the controller.""" + mock_ctrl = MagicMock() + mock_ctrl_cls.return_value = mock_ctrl + + validate_command(workflow_args=["--input-dir", "docs"], recipe="retrieval-sdg") + + mock_ctrl.run_validate.assert_called_once_with( + config_source=None, + recipe="retrieval-sdg", + workflow_args=("--input-dir", "docs"), + ) diff --git a/packages/data-designer/tests/cli/controllers/test_generation_controller.py b/packages/data-designer/tests/cli/controllers/test_generation_controller.py index 151f2cbb4..cb311aab3 100644 --- a/packages/data-designer/tests/cli/controllers/test_generation_controller.py +++ b/packages/data-designer/tests/cli/controllers/test_generation_controller.py @@ -10,9 +10,10 @@ import typer from data_designer.cli.controllers.generation_controller import GenerationController -from data_designer.cli.utils.config_loader import ConfigLoadError +from data_designer.cli.utils.config_loader import ConfigLoadError, WorkflowHelpRequested from data_designer.config.config_builder import DataDesignerConfigBuilder from data_designer.config.errors import InvalidConfigError +from data_designer.config.script_params import DataDesignerScriptParams from data_designer.config.utils.constants import DEFAULT_DISPLAY_WIDTH _CTRL = "data_designer.cli.controllers.generation_controller" @@ -54,7 +55,7 @@ def test_run_preview_success(mock_load_config: MagicMock, mock_dd_cls: MagicMock controller = GenerationController() controller.run_preview(config_source="config.yaml", num_records=5, non_interactive=True) - mock_load_config.assert_called_once_with("config.yaml") + mock_load_config.assert_called_once_with("config.yaml", script_params=DataDesignerScriptParams()) mock_dd_cls.assert_called_once() mock_dd.preview.assert_called_once_with(mock_builder, num_records=5) @@ -76,6 +77,32 @@ def test_run_preview_custom_num_records(mock_load_config: MagicMock, mock_dd_cls mock_dd.preview.assert_called_once_with(mock_builder, num_records=20) +@patch(f"{_CTRL}.DataDesigner") +@patch(f"{_CTRL}.load_recipe_config_builder") +def test_run_preview_loads_recipe_target(mock_load_recipe: MagicMock, mock_dd_cls: MagicMock) -> None: + """Test preview loads a recipe target with workflow args.""" + mock_builder = MagicMock(spec=DataDesignerConfigBuilder) + mock_load_recipe.return_value = mock_builder + mock_dd = MagicMock() + mock_dd_cls.return_value = mock_dd + mock_dd.preview.return_value = _make_mock_preview_results(2) + + controller = GenerationController() + controller.run_preview( + config_source=None, + recipe="retrieval-sdg", + workflow_args=("--input-dir", "docs"), + num_records=2, + non_interactive=True, + ) + + mock_load_recipe.assert_called_once_with( + "retrieval-sdg", + script_params=DataDesignerScriptParams(argv=("--input-dir", "docs")), + ) + mock_dd.preview.assert_called_once_with(mock_builder, num_records=2) + + @patch(f"{_CTRL}.load_config_builder") def test_run_preview_config_load_error(mock_load_config: MagicMock) -> None: """Test preview exits with code 1 when config fails to load.""" @@ -88,6 +115,23 @@ def test_run_preview_config_load_error(mock_load_config: MagicMock) -> None: assert exc_info.value.exit_code == 1 +@patch(f"{_CTRL}.load_config_builder") +def test_run_preview_workflow_help_exits_successfully(mock_load_config: MagicMock) -> None: + """Test preview exits with code 0 when workflow help is requested.""" + mock_load_config.side_effect = WorkflowHelpRequested() + + controller = GenerationController() + with pytest.raises(typer.Exit) as exc_info: + controller.run_preview( + config_source="config.py", + workflow_args=("--help",), + num_records=10, + non_interactive=True, + ) + + assert exc_info.value.exit_code == 0 + + @patch(f"{_CTRL}.DataDesigner") @patch(f"{_CTRL}.load_config_builder") def test_run_preview_generation_fails(mock_load_config: MagicMock, mock_dd_cls: MagicMock) -> None: @@ -612,7 +656,7 @@ def test_run_validate_success(mock_load_config: MagicMock, mock_dd_cls: MagicMoc controller = GenerationController() controller.run_validate(config_source="config.yaml") - mock_load_config.assert_called_once_with("config.yaml") + mock_load_config.assert_called_once_with("config.yaml", script_params=DataDesignerScriptParams()) mock_dd_cls.assert_called_once() mock_dd.validate.assert_called_once_with(mock_builder) @@ -680,7 +724,7 @@ def test_run_create_success(mock_load_config: MagicMock, mock_dd_cls: MagicMock) controller = GenerationController() controller.run_create(config_source="config.yaml", num_records=10, dataset_name="dataset", artifact_path=None) - mock_load_config.assert_called_once_with("config.yaml") + mock_load_config.assert_called_once_with("config.yaml", script_params=DataDesignerScriptParams()) mock_dd_cls.assert_called_once_with(artifact_path=Path.cwd() / "artifacts") mock_dd.create.assert_called_once_with(mock_builder, num_records=10, dataset_name="dataset") diff --git a/packages/data-designer/tests/cli/test_main.py b/packages/data-designer/tests/cli/test_main.py index 15349d8e6..40ae6666d 100644 --- a/packages/data-designer/tests/cli/test_main.py +++ b/packages/data-designer/tests/cli/test_main.py @@ -9,6 +9,7 @@ from typer.testing import CliRunner from data_designer.cli.main import app, main +from data_designer.cli.utils.recipe_loader import RecipeSummary from data_designer.config.utils.constants import DEFAULT_NUM_RECORDS runner = CliRunner() @@ -81,8 +82,73 @@ def test_app_dispatches_lazy_create_command(mock_controller_cls: Mock) -> None: assert result.exit_code == 0 mock_controller.run_create.assert_called_once_with( config_source="config.yaml", + recipe=None, + workflow_args=(), num_records=DEFAULT_NUM_RECORDS, dataset_name="dataset", artifact_path=None, output_format=None, ) + + +@patch("data_designer.cli.commands.preview.GenerationController") +def test_app_dispatches_lazy_preview_command_with_recipe_args(mock_controller_cls: Mock) -> None: + """The Typer app forwards workflow args after -- when using --recipe.""" + mock_controller = Mock() + mock_controller_cls.return_value = mock_controller + + result = runner.invoke(app, ["preview", "--recipe", "retrieval-sdg", "--", "--input-dir", "docs"]) + + assert result.exit_code == 0 + mock_controller.run_preview.assert_called_once_with( + config_source=None, + recipe="retrieval-sdg", + workflow_args=("--input-dir", "docs"), + num_records=DEFAULT_NUM_RECORDS, + non_interactive=False, + save_results=False, + artifact_path=None, + theme="dark", + display_width=110, + ) + + +@patch("data_designer.cli.commands.preview.GenerationController") +def test_app_dispatches_lazy_preview_command_with_workflow_args(mock_controller_cls: Mock) -> None: + """The Typer app forwards workflow args after -- for Python config workflows.""" + mock_controller = Mock() + mock_controller_cls.return_value = mock_controller + + result = runner.invoke(app, ["preview", "config.py", "--", "--seed-path", "seed.jsonl"]) + + assert result.exit_code == 0 + mock_controller.run_preview.assert_called_once_with( + config_source="config.py", + recipe=None, + workflow_args=("--seed-path", "seed.jsonl"), + num_records=DEFAULT_NUM_RECORDS, + non_interactive=False, + save_results=False, + artifact_path=None, + theme="dark", + display_width=110, + ) + + +@patch("data_designer.cli.commands.recipes.list_recipes") +def test_app_dispatches_lazy_recipes_list_command(mock_list_recipes: Mock) -> None: + """The Typer app dispatches recipe inspection commands through the lazy loader.""" + mock_list_recipes.return_value = [ + RecipeSummary( + name="demo", + entry_point="demo_package.recipe:load_config_builder", + package="demo-package", + version="1.0.0", + ) + ] + + result = runner.invoke(app, ["recipes", "list"]) + + assert result.exit_code == 0 + assert "demo" in result.output + mock_list_recipes.assert_called_once_with() diff --git a/packages/data-designer/tests/cli/utils/test_config_loader.py b/packages/data-designer/tests/cli/utils/test_config_loader.py index e008290b0..bdf530406 100644 --- a/packages/data-designer/tests/cli/utils/test_config_loader.py +++ b/packages/data-designer/tests/cli/utils/test_config_loader.py @@ -10,9 +10,11 @@ from data_designer.cli.utils.config_loader import ( ConfigLoadError, + WorkflowHelpRequested, load_config_builder, ) from data_designer.config.config_builder import DataDesignerConfigBuilder +from data_designer.config.script_params import DataDesignerScriptParams @patch("data_designer.cli.utils.config_loader.DataDesignerConfigBuilder.from_config") @@ -97,7 +99,7 @@ def test_load_config_builder_from_python_module(tmp_path: Path) -> None: result = load_config_builder(str(py_file)) - mock_load_py.assert_called_once_with(py_file) + mock_load_py.assert_called_once_with(py_file, None) assert result is mock_builder @@ -206,6 +208,78 @@ def test_load_config_builder_python_module_sibling_import(tmp_path: Path) -> Non assert result._test_marker == "my_dataset" +def test_load_config_builder_python_module_receives_script_params(tmp_path: Path) -> None: + """Test that a params-aware Python config receives workflow arguments.""" + py_file = tmp_path / "params_config.py" + py_file.write_text( + "from data_designer.config.config_builder import DataDesignerConfigBuilder\n\n" + "def load_config_builder(params):\n" + " builder = DataDesignerConfigBuilder()\n" + " builder._test_argv = params.argv\n" + " return builder\n" + ) + + result = load_config_builder( + str(py_file), + script_params=DataDesignerScriptParams(argv=("--seed-path", "seed.jsonl")), + ) + + assert isinstance(result, DataDesignerConfigBuilder) + assert result._test_argv == ("--seed-path", "seed.jsonl") + + +def test_load_config_builder_python_module_preserves_argparse_help_exit(tmp_path: Path) -> None: + """Test that argparse --help exits cleanly instead of being treated as a load error.""" + py_file = tmp_path / "help_config.py" + py_file.write_text( + "import argparse\n" + "from data_designer.config.config_builder import DataDesignerConfigBuilder\n\n" + "def load_config_builder(params):\n" + " parser = argparse.ArgumentParser()\n" + " parser.add_argument('--seed-path')\n" + " parser.parse_args(list(params.argv))\n" + " return DataDesignerConfigBuilder()\n" + ) + + with pytest.raises(WorkflowHelpRequested): + load_config_builder(str(py_file), script_params=DataDesignerScriptParams(argv=("--help",))) + + +def test_load_config_builder_python_module_rejects_args_for_legacy_function(tmp_path: Path) -> None: + """Test that a no-arg Python config fails clearly when workflow args are supplied.""" + py_file = tmp_path / "legacy_config.py" + py_file.write_text( + "from data_designer.config.config_builder import DataDesignerConfigBuilder\n\n" + "def load_config_builder():\n" + " return DataDesignerConfigBuilder()\n" + ) + + with pytest.raises(ConfigLoadError, match="does not accept workflow arguments"): + load_config_builder(str(py_file), script_params=DataDesignerScriptParams(argv=("--seed-path", "seed.jsonl"))) + + +def test_load_config_builder_rejects_script_params_for_yaml(tmp_path: Path) -> None: + """Test that static YAML configs cannot receive workflow args.""" + yaml_file = tmp_path / "config.yaml" + yaml_file.write_text("data_designer:\n columns: []\n") + + with pytest.raises(ConfigLoadError, match="Workflow arguments are only supported"): + load_config_builder(str(yaml_file), script_params=DataDesignerScriptParams(argv=("--seed-path", "seed.jsonl"))) + + +def test_load_config_builder_python_module_rejects_unsupported_signature(tmp_path: Path) -> None: + """Test that Python config modules must use the supported workflow signature.""" + py_file = tmp_path / "bad_signature.py" + py_file.write_text( + "from data_designer.config.config_builder import DataDesignerConfigBuilder\n\n" + "def load_config_builder(first, second):\n" + " return DataDesignerConfigBuilder()\n" + ) + + with pytest.raises(ConfigLoadError, match="Unsupported 'load_config_builder\\(\\)' signature"): + load_config_builder(str(py_file)) + + def test_load_config_builder_python_module_cleans_sys_path(tmp_path: Path) -> None: """Test that the config's parent directory is removed from sys.path after loading.""" import sys diff --git a/packages/data-designer/tests/cli/utils/test_recipe_loader.py b/packages/data-designer/tests/cli/utils/test_recipe_loader.py new file mode 100644 index 000000000..9fc8f604b --- /dev/null +++ b/packages/data-designer/tests/cli/utils/test_recipe_loader.py @@ -0,0 +1,199 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import argparse +import sys +from pathlib import Path +from types import ModuleType +from typing import Annotated, Any +from unittest.mock import MagicMock, patch + +import click +import pytest +import typer + +from data_designer.cli.utils.config_loader import ConfigLoadError +from data_designer.cli.utils.recipe_loader import get_recipe_details, get_recipe_help_text, load_recipe_config_builder +from data_designer.config.config_builder import DataDesignerConfigBuilder +from data_designer.config.script_params import DataDesignerScriptParams + + +class FakeEntryPoint: + """Minimal entry point stub.""" + + def __init__(self, name: str, loaded: Any): + self.name = name + self._loaded = loaded + + def load(self) -> Any: + return self._loaded + + +def build_arg_parser() -> argparse.ArgumentParser: + """Build a parser used by recipe inspection tests.""" + parser = argparse.ArgumentParser( + prog="demo", + description="Demo recipe.", + ) + parser.add_argument("--seed-path", required=True, help="Seed dataset path.") + parser.add_argument("--variant", default="compact", choices=["compact", "verbose"], help="Prompt variant.") + return parser + + +def demo_recipe(params: DataDesignerScriptParams) -> DataDesignerConfigBuilder: + """Demo recipe callable.""" + builder = DataDesignerConfigBuilder() + builder._test_argv = params.argv + return builder + + +def make_typer_recipe_module(module_name: str) -> tuple[ModuleType, Any]: + """Create a temporary module exposing a Typer recipe app.""" + module = ModuleType(module_name) + + def recipe_command( + seed_path: Annotated[Path, typer.Option("--seed-path", help="Seed dataset path.")], + variant: Annotated[ + str, + typer.Option( + "--variant", + help="Prompt variant.", + click_type=click.Choice(["compact", "verbose"]), + ), + ] = "compact", + ) -> DataDesignerConfigBuilder: + builder = DataDesignerConfigBuilder() + builder._test_seed_path = seed_path + builder._test_variant = variant + return builder + + def build_typer_app() -> typer.Typer: + app = typer.Typer(add_completion=False, help="Demo Typer recipe.") + app.command(name=None, help="Build the demo recipe.")(recipe_command) + return app + + def recipe(params: DataDesignerScriptParams) -> DataDesignerConfigBuilder: + command = typer.main.get_command(build_typer_app()) + return command.main(args=list(params.argv), standalone_mode=False) + + recipe.__module__ = module_name + recipe_command.__module__ = module_name + build_typer_app.__module__ = module_name + module.recipe = recipe + module.recipe_command = recipe_command + module.build_typer_app = build_typer_app + sys.modules[module_name] = module + return module, recipe + + +def test_load_recipe_config_builder_loads_callable_recipe() -> None: + """Test that a recipe entry point can expose a config builder callable.""" + entry_point = FakeEntryPoint("demo", demo_recipe) + + with patch("data_designer.cli.utils.recipe_loader.importlib.metadata.entry_points", return_value=[entry_point]): + result = load_recipe_config_builder( + "demo", + script_params=DataDesignerScriptParams(argv=("--seed-path", "seed.jsonl")), + ) + + assert isinstance(result, DataDesignerConfigBuilder) + assert result._test_argv == ("--seed-path", "seed.jsonl") + + +def test_get_recipe_details_reads_argparse_metadata() -> None: + """Test that recipe details include parser-derived argument metadata.""" + entry_point = FakeEntryPoint("demo", demo_recipe) + + with patch("data_designer.cli.utils.recipe_loader.importlib.metadata.entry_points", return_value=[entry_point]): + details = get_recipe_details("demo") + + assert details.description == "Demo recipe." + assert details.help_text is not None + assert "Seed dataset path." in details.help_text + assert details.arguments[0]["name"] == "seed_path" + assert details.arguments[0]["flags"] == ["--seed-path"] + assert details.arguments[0]["required"] is True + assert details.arguments[1]["choices"] == ["compact", "verbose"] + + +def test_get_recipe_details_reads_typer_metadata() -> None: + """Test that recipe details can include Typer-derived argument metadata.""" + module, recipe = make_typer_recipe_module("test_typer_recipe_details") + entry_point = FakeEntryPoint("demo", recipe) + + try: + with patch("data_designer.cli.utils.recipe_loader.importlib.metadata.entry_points", return_value=[entry_point]): + details = get_recipe_details("demo") + finally: + sys.modules.pop(module.__name__, None) + + assert details.description == "Build the demo recipe." + assert details.help_text is not None + assert "Seed dataset path." in details.help_text + assert details.arguments[0]["name"] == "seed_path" + assert details.arguments[0]["flags"] == ["--seed-path"] + assert details.arguments[0]["required"] is True + assert details.arguments[1]["choices"] == ["compact", "verbose"] + + +def test_get_recipe_help_text_returns_argparse_help() -> None: + """Test that recipe help returns parser-formatted help.""" + entry_point = FakeEntryPoint("demo", demo_recipe) + + with patch("data_designer.cli.utils.recipe_loader.importlib.metadata.entry_points", return_value=[entry_point]): + help_text = get_recipe_help_text("demo") + + assert "usage: demo" in help_text + assert "--seed-path" in help_text + + +def test_load_recipe_config_builder_loads_typer_app_entry_point() -> None: + """Test that a recipe entry point can expose a Typer app directly.""" + module, _ = make_typer_recipe_module("test_typer_recipe_entry_point") + entry_point = FakeEntryPoint("demo", module.build_typer_app()) + + try: + with patch("data_designer.cli.utils.recipe_loader.importlib.metadata.entry_points", return_value=[entry_point]): + result = load_recipe_config_builder( + "demo", + script_params=DataDesignerScriptParams(argv=("--seed-path", "seed.jsonl", "--variant", "verbose")), + ) + finally: + sys.modules.pop(module.__name__, None) + + assert isinstance(result, DataDesignerConfigBuilder) + assert result._test_seed_path == Path("seed.jsonl") + assert result._test_variant == "verbose" + + +@patch("data_designer.cli.utils.recipe_loader.load_config_builder") +def test_load_recipe_config_builder_loads_config_path(mock_load_config_builder: MagicMock) -> None: + """Test that a recipe entry point can expose a config source path.""" + mock_builder = MagicMock(spec=DataDesignerConfigBuilder) + mock_load_config_builder.return_value = mock_builder + script_params = DataDesignerScriptParams(argv=("--seed-path", "seed.jsonl")) + entry_point = FakeEntryPoint("demo", Path("workflow.py")) + + with patch("data_designer.cli.utils.recipe_loader.importlib.metadata.entry_points", return_value=[entry_point]): + result = load_recipe_config_builder("demo", script_params=script_params) + + mock_load_config_builder.assert_called_once_with("workflow.py", script_params=script_params) + assert result is mock_builder + + +def test_load_recipe_config_builder_errors_for_missing_recipe() -> None: + """Test that an unknown recipe produces a clear load error.""" + with patch("data_designer.cli.utils.recipe_loader.importlib.metadata.entry_points", return_value=[]): + with pytest.raises(ConfigLoadError, match="No installed Data Designer recipe named 'missing'"): + load_recipe_config_builder("missing") + + +def test_load_recipe_config_builder_rejects_invalid_entry_point_object() -> None: + """Test that recipes must expose a callable or config source path.""" + entry_point = FakeEntryPoint("bad", object()) + + with patch("data_designer.cli.utils.recipe_loader.importlib.metadata.entry_points", return_value=[entry_point]): + with pytest.raises(ConfigLoadError, match="expected a config builder callable, Typer app, or config path"): + load_recipe_config_builder("bad")