Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
UniformSamplerParams,
UUIDSamplerParams,
)
from data_designer.config.script_params import DataDesignerScriptParams # noqa: F401
from data_designer.config.seed import ( # noqa: F401
IndexRange,
PartitionBlock,
Expand Down Expand Up @@ -204,6 +205,8 @@
"PartitionBlock": (_MOD_SEED, "PartitionBlock"),
"SamplingStrategy": (_MOD_SEED, "SamplingStrategy"),
"SeedConfig": (_MOD_SEED, "SeedConfig"),
# script params
"DataDesignerScriptParams": (f"{_MOD_BASE}.script_params", "DataDesignerScriptParams"),
# seed_source
"DataFrameSeedSource": (f"{_MOD_BASE}.seed_source_dataframe", "DataFrameSeedSource"),
"AgentRolloutFormat": (_MOD_SEED_SOURCE, "AgentRolloutFormat"),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

from dataclasses import dataclass


@dataclass(frozen=True, slots=True)
class DataDesignerScriptParams:
"""Runtime parameters forwarded to Python config workflows.

Attributes:
argv: Raw workflow arguments passed after the CLI ``--`` separator.
"""

argv: tuple[str, ...] = ()
22 changes: 16 additions & 6 deletions packages/data-designer/src/data_designer/cli/commands/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,29 @@

from __future__ import annotations

from typing import Annotated

import click
import typer

from data_designer.cli.commands.generation_args import resolve_generation_config_target
from data_designer.cli.controllers.generation_controller import GenerationController
from data_designer.config.utils.constants import DEFAULT_NUM_RECORDS
from data_designer.interface.results import SUPPORTED_EXPORT_FORMATS


def create_command(
config_source: str = typer.Argument(
help=(
"Path or URL to a config file (.yaml/.yml/.json), or a local Python module (.py)"
" that defines a load_config_builder() function."
workflow_args: Annotated[
list[str] | None,
typer.Argument(
metavar="[CONFIG_SOURCE] [-- WORKFLOW_ARGS]",
help=(
"Path or URL to a config file (.yaml/.yml/.json), or a local Python module (.py)"
" that defines a load_config_builder() function. Extra arguments after '--' are forwarded to Python"
" workflows."
),
),
),
] = None,
num_records: int = typer.Option(
DEFAULT_NUM_RECORDS,
"--num-records",
Expand Down Expand Up @@ -67,9 +75,11 @@ def create_command(
# Create from a Python module with custom output path
data-designer create my_config.py --artifact-path /path/to/output
"""
target = resolve_generation_config_target(workflow_args)
controller = GenerationController()
controller.run_create(
config_source=config_source,
config_source=target.config_source,
workflow_args=target.workflow_args,
num_records=num_records,
dataset_name=dataset_name,
artifact_path=artifact_path,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

from dataclasses import dataclass

import click


@dataclass(frozen=True)
class GenerationConfigTarget:
"""Resolved config target for create, preview, and validate commands."""

config_source: str
workflow_args: tuple[str, ...]


def resolve_generation_config_target(raw_args: list[str] | None) -> GenerationConfigTarget:
"""Split variadic CLI args into a config source plus workflow args."""
args = tuple(raw_args or ())
if not args:
raise click.UsageError("Missing argument 'CONFIG_SOURCE'.")

config_source, *workflow_args = args
return GenerationConfigTarget(config_source=config_source, workflow_args=tuple(workflow_args))
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,28 @@

from __future__ import annotations

from typing import Annotated

import click
import typer

from data_designer.cli.commands.generation_args import resolve_generation_config_target
from data_designer.cli.controllers.generation_controller import GenerationController
from data_designer.config.utils.constants import DEFAULT_DISPLAY_WIDTH, DEFAULT_NUM_RECORDS


def preview_command(
config_source: str = typer.Argument(
help=(
"Path or URL to a config file (.yaml/.yml/.json), or a local Python module (.py)"
" that defines a load_config_builder() function."
workflow_args: Annotated[
list[str] | None,
typer.Argument(
metavar="[CONFIG_SOURCE] [-- WORKFLOW_ARGS]",
help=(
"Path or URL to a config file (.yaml/.yml/.json), or a local Python module (.py)"
" that defines a load_config_builder() function. Extra arguments after '--' are forwarded to Python"
" workflows."
),
),
),
] = None,
num_records: int = typer.Option(
DEFAULT_NUM_RECORDS,
"--num-records",
Expand Down Expand Up @@ -54,9 +62,11 @@ def preview_command(
),
) -> None:
"""Generate a preview dataset for fast iteration on your configuration."""
target = resolve_generation_config_target(workflow_args)
controller = GenerationController()
controller.run_preview(
config_source=config_source,
config_source=target.config_source,
workflow_args=target.workflow_args,
num_records=num_records,
non_interactive=non_interactive,
save_results=save_results,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,26 @@

from __future__ import annotations

from typing import Annotated

import typer

from data_designer.cli.commands.generation_args import resolve_generation_config_target
from data_designer.cli.controllers.generation_controller import GenerationController


def validate_command(
config_source: str = typer.Argument(
help=(
"Path or URL to a config file (.yaml/.yml/.json), or a local Python module (.py)"
" that defines a load_config_builder() function."
workflow_args: Annotated[
list[str] | None,
typer.Argument(
metavar="[CONFIG_SOURCE] [-- WORKFLOW_ARGS]",
help=(
"Path or URL to a config file (.yaml/.yml/.json), or a local Python module (.py)"
" that defines a load_config_builder() function. Extra arguments after '--' are forwarded to Python"
" workflows."
),
),
),
] = None,
) -> None:
"""Validate a Data Designer configuration.

Expand All @@ -31,5 +39,6 @@ def validate_command(
# Validate a Python module
data-designer validate my_config.py
"""
target = resolve_generation_config_target(workflow_args)
controller = GenerationController()
controller.run_validate(config_source=config_source)
controller.run_validate(config_source=target.config_source, workflow_args=target.workflow_args)
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
import typer

from data_designer.cli.ui import console, print_error, print_header, print_success, wait_for_navigation_key
from data_designer.cli.utils.config_loader import ConfigLoadError, load_config_builder
from data_designer.cli.utils.config_loader import ConfigLoadError, WorkflowHelpRequested, load_config_builder
from data_designer.cli.utils.sample_records_pager import PAGER_FILENAME, create_sample_records_pager
from data_designer.config.errors import InvalidConfigError
from data_designer.config.script_params import DataDesignerScriptParams
from data_designer.config.utils.constants import DEFAULT_DISPLAY_WIDTH
from data_designer.interface import DataDesigner
from data_designer.logging import LOG_INDENT
Expand All @@ -31,13 +32,14 @@ class GenerationController:

def run_preview(
self,
config_source: str,
config_source: str | None,
num_records: int,
non_interactive: bool,
save_results: bool = False,
artifact_path: str | None = None,
theme: Literal["dark", "light"] = "dark",
display_width: int = DEFAULT_DISPLAY_WIDTH,
workflow_args: tuple[str, ...] = (),
) -> None:
"""Load config, generate a preview dataset, and display the results.

Expand All @@ -49,8 +51,9 @@ def run_preview(
artifact_path: Directory to save results in, or None for ./artifacts.
theme: Color theme for HTML output (dark or light).
display_width: Maximum width of the rendered record output in characters.
workflow_args: Arguments forwarded to Python config workflows.
"""
config_builder = self._load_config(config_source)
config_builder = self._load_config(config_source, workflow_args=workflow_args)

print_header("Data Designer Preview")
console.print(f" Config: [bold]{config_source}[/bold]")
Expand Down Expand Up @@ -86,13 +89,18 @@ def run_preview(
console.print()
print_success(f"Preview complete — {total} record(s) generated")

def run_validate(self, config_source: str) -> None:
def run_validate(
self,
config_source: str,
workflow_args: tuple[str, ...] = (),
) -> None:
"""Load config and validate it against the engine.

Args:
config_source: Path to a config file or Python module.
workflow_args: Arguments forwarded to Python config workflows.
"""
config_builder = self._load_config(config_source)
config_builder = self._load_config(config_source, workflow_args=workflow_args)

print_header("Data Designer Validate")
console.print(f" Config: [bold]{config_source}[/bold]")
Expand All @@ -112,11 +120,12 @@ def run_validate(self, config_source: str) -> None:

def run_create(
self,
config_source: str,
config_source: str | None,
num_records: int,
dataset_name: str,
artifact_path: str | None,
output_format: str | None = None,
workflow_args: tuple[str, ...] = (),
) -> None:
"""Load config, create a full dataset, and save results to disk.

Expand All @@ -127,8 +136,9 @@ def run_create(
artifact_path: Path where generated artifacts will be stored, or None for default.
output_format: If set, export the dataset to a single file in this format after
generation. One of 'jsonl', 'csv', 'parquet'.
workflow_args: Arguments forwarded to Python config workflows.
"""
config_builder = self._load_config(config_source)
config_builder = self._load_config(config_source, workflow_args=workflow_args)

resolved_artifact_path = Path(artifact_path) if artifact_path else Path.cwd() / "artifacts"

Expand Down Expand Up @@ -174,20 +184,28 @@ def run_create(
print_success(f"Dataset created — {actual_record_count} record(s) generated")
console.print()

def _load_config(self, config_source: str) -> DataDesignerConfigBuilder:
def _load_config(
self,
config_source: str,
workflow_args: tuple[str, ...] = (),
) -> DataDesignerConfigBuilder:
"""Load a config builder from the given source, exiting on failure.

Args:
config_source: Path to a config file or Python module.
workflow_args: Arguments forwarded to Python config workflows.

Returns:
A DataDesignerConfigBuilder instance.

Raises:
typer.Exit: If the config cannot be loaded.
"""
script_params = DataDesignerScriptParams(argv=workflow_args)
try:
return load_config_builder(config_source)
return load_config_builder(config_source, script_params=script_params)
except WorkflowHelpRequested as e:
raise typer.Exit(code=0) from e
except ConfigLoadError as e:
print_error(str(e))
raise typer.Exit(code=1)
Expand Down
Loading
Loading