Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
4ff6a5a
feat: add agent-context CLI introspection
johnnygreco Feb 13, 2026
e6a42c8
fix: correct agent-context field descriptions in column configs
johnnygreco Feb 15, 2026
bf47dfd
feat: enhance pydantic and method inspectors with richer field details
johnnygreco Feb 16, 2026
84f51c8
feat: add Field descriptions and docstrings to config models
johnnygreco Feb 16, 2026
3689979
feat: enhance formatters with rich field display, dedup, and new form…
johnnygreco Feb 16, 2026
1c3ff18
feat: add discovery for namespace tree, interface classes, and imports
johnnygreco Feb 16, 2026
fbae7e0
refactor: rename agent-context CLI to introspect and add new subcommands
johnnygreco Feb 16, 2026
b8efb5b
test: add CLI usage scenario integration tests
johnnygreco Feb 16, 2026
b0797b6
refactor: replace introspect command with types and reference command…
johnnygreco Feb 16, 2026
0abeafb
refactor: update formatters and tests for new types/reference CLI str…
johnnygreco Feb 16, 2026
dd315e8
drop stale review
johnnygreco Feb 16, 2026
7806b2b
refactor: replace hardcoded discovery functions with introspection-ba…
johnnygreco Feb 16, 2026
ef57f80
fix: improve introspection defaults and depth checks
johnnygreco Feb 16, 2026
277a66f
fix: align enum output across text/json and remove dead try/except
johnnygreco Feb 16, 2026
6e5ebc7
fix: surface namespace import failures in debug logs
johnnygreco Feb 16, 2026
072af89
sort
johnnygreco Feb 16, 2026
a0a62d8
refactor introspection discovery and normalize typed schema output
johnnygreco Feb 16, 2026
2a90c97
feat: add data-designer list-assets agent-helper command
johnnygreco Feb 17, 2026
078894e
refactor: replace types/reference commands with inspect agent-helper
johnnygreco Feb 18, 2026
78d897f
feat: add list agent-helper command group
johnnygreco Feb 18, 2026
b73ac3c
docs: clarify that constraints apply only to sampler columns
johnnygreco Feb 18, 2026
90a3f6d
refactor: rename inspect "builder" subcommand to "config_builder"
johnnygreco Feb 18, 2026
63762a7
docs: improve agent-helper CLI help descriptions for agent consumption
johnnygreco Feb 18, 2026
87cba7b
fix: use hyphenated config-builder for CLI subcommand name
johnnygreco Feb 18, 2026
e550a69
docs: tighten agent-helper CLI help descriptions
johnnygreco Feb 18, 2026
4707728
docs: use column header names in list command tips for clarity
johnnygreco Feb 18, 2026
1a055f0
docs: sharpen inspect and list group-level help descriptions
johnnygreco Feb 18, 2026
f5fa650
refactor: remove related_inspect_tip from inspect command output
johnnygreco Feb 18, 2026
8668ab2
refactor: remove dead code from introspection services
johnnygreco Feb 18, 2026
152cc5f
fix: harden introspection service layer
johnnygreco Feb 18, 2026
5c44caa
refactor: clean up IntrospectionController
johnnygreco Feb 18, 2026
352be20
fix: harden ListController and eliminate DRY violation
johnnygreco Feb 18, 2026
aae3e6a
docs: polish help text and field description consistency
johnnygreco Feb 18, 2026
0b9ea11
test: add coverage for introspection edge cases and crash paths
johnnygreco Feb 18, 2026
3572057
refactor: simplify introspection inspectors without changing output
johnnygreco Feb 18, 2026
8479392
refactor: lazy-load inspect CLI commands
johnnygreco Feb 19, 2026
c121a08
fix: restore agent-helper list CLI commands
johnnygreco Feb 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions packages/data-designer-config/src/data_designer/config/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,19 @@ class SingleColumnConfig(ConfigBase, ABC):
name: Unique name of the column to be generated.
drop: If True, the column will be generated but removed from the final dataset.
Useful for intermediate columns that are dependencies for other columns.
allow_resize: If True, the column is allowed to be resized during generation.
column_type: Discriminator field that identifies the specific column type.
Subclasses must override this field to specify the column type with a `Literal` value.
"""

name: str
drop: bool = False
allow_resize: bool = False
column_type: str
name: str = Field(description="Unique name of the column to be generated")
drop: bool = Field(
default=False, description="If True, the column will be generated but removed from the final dataset"
)
allow_resize: bool = Field(
default=False, description="If True, the column is allowed to be resized during generation"
)
column_type: str = Field(description="Discriminator field that identifies the specific column type")

@staticmethod
def get_column_emoji() -> str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,22 @@ class SamplerColumnConfig(SingleColumnConfig):
```
"""

sampler_type: SamplerType
params: Annotated[SamplerParamsT, Discriminator("sampler_type")]
conditional_params: dict[str, Annotated[SamplerParamsT, Discriminator("sampler_type")]] = {}
convert_to: str | None = None
column_type: Literal["sampler"] = "sampler"
sampler_type: SamplerType = Field(
description="Type of sampler to use (e.g., uuid, category, uniform, gaussian, person, datetime)"
)
params: Annotated[SamplerParamsT, Discriminator("sampler_type")] = Field(
description="Parameters specific to the chosen sampler type"
)
conditional_params: dict[str, Annotated[SamplerParamsT, Discriminator("sampler_type")]] = Field(
default_factory=dict,
description="Optional dictionary for conditional parameters; keys are conditions, values are params to use when met",
)
convert_to: str | None = Field(
default=None, description="Optional type conversion after sampling: 'float', 'int', or 'str'"
)
column_type: Literal["sampler"] = Field(
default="sampler", description="Discriminator field, always 'sampler' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -136,14 +147,28 @@ class LLMTextColumnConfig(SingleColumnConfig):
column_type: Discriminator field, always "llm-text" for this configuration type.
"""

prompt: str
model_alias: str
system_prompt: str | None = None
multi_modal_context: list[ImageContext] | None = None
tool_alias: str | None = None
with_trace: TraceType = TraceType.NONE
extract_reasoning_content: bool = False
column_type: Literal["llm-text"] = "llm-text"
prompt: str = Field(
description="Jinja2 template for the LLM prompt; can reference other columns via {{ column_name }}"
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it worth warning here or somewhere else about providing fstrings here that could mess up the jinja template? I've found that cursor likes to auto convert this to f""

)
model_alias: str = Field(description="Alias of the model configuration to use for generation")
system_prompt: str | None = Field(
default=None, description="Optional system prompt to set model behavior and constraints"
)
multi_modal_context: list[ImageContext] | None = Field(
default=None, description="Optional list of ImageContext for vision model inputs"
)
tool_alias: str | None = Field(
default=None, description="Optional alias of the tool configuration to use for MCP tool calls"
)
with_trace: TraceType = Field(
default=TraceType.NONE, description="Trace capture mode: NONE, LAST_MESSAGE, or ALL_MESSAGES"
)
extract_reasoning_content: bool = Field(
default=False, description="If True, capture chain-of-thought in {name}__reasoning_content column"
)
column_type: Literal["llm-text"] = Field(
default="llm-text", description="Discriminator field, always 'llm-text' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -219,8 +244,12 @@ class LLMCodeColumnConfig(LLMTextColumnConfig):
column containing the reasoning content from the final assistant response.
"""

code_lang: CodeLang
column_type: Literal["llm-code"] = "llm-code"
code_lang: CodeLang = Field(
description="Target programming language or SQL dialect for code extraction from LLM response"
)
column_type: Literal["llm-code"] = Field(
default="llm-code", description="Discriminator field, always 'llm-code' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -252,8 +281,12 @@ class LLMStructuredColumnConfig(LLMTextColumnConfig):
column containing the reasoning content from the final assistant response.
"""

output_format: dict | type[BaseModel]
column_type: Literal["llm-structured"] = "llm-structured"
output_format: dict | type[BaseModel] = Field(
description="Pydantic model or JSON schema dict defining the expected structured output shape"
)
column_type: Literal["llm-structured"] = Field(
default="llm-structured", description="Discriminator field, always 'llm-structured' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -317,8 +350,12 @@ class LLMJudgeColumnConfig(LLMTextColumnConfig):
column containing the reasoning content from the final assistant response.
"""

scores: list[Score] = Field(..., min_length=1)
column_type: Literal["llm-judge"] = "llm-judge"
scores: list[Score] = Field(
..., min_length=1, description="List of Score objects defining rubric criteria for LLM judge evaluation"
)
column_type: Literal["llm-judge"] = Field(
default="llm-judge", description="Discriminator field, always 'llm-judge' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand All @@ -341,10 +378,13 @@ class ExpressionColumnConfig(SingleColumnConfig):
column_type: Discriminator field, always "expression" for this configuration type.
"""

name: str
expr: str
dtype: Literal["int", "float", "str", "bool"] = "str"
column_type: Literal["expression"] = "expression"
expr: str = Field(description="Jinja2 expression to compute the column value from other columns")
dtype: Literal["int", "float", "str", "bool"] = Field(
default="str", description="Data type for expression result: 'int', 'float', 'str', or 'bool'"
)
column_type: Literal["expression"] = Field(
default="expression", description="Discriminator field, always 'expression' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -410,11 +450,13 @@ class ValidationColumnConfig(SingleColumnConfig):
column_type: Discriminator field, always "validation" for this configuration type.
"""

target_columns: list[str]
validator_type: ValidatorType
validator_params: ValidatorParamsT
target_columns: list[str] = Field(description="List of column names to validate")
validator_type: ValidatorType = Field(description="Validation method: 'code', 'local_callable', or 'remote'")
Copy link
Copy Markdown
Contributor

@nabinchha nabinchha Feb 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is 'code', 'local_callable', or 'remote' necessary since it's already strongly typed? Same comment for other similar chanages.

validator_params: ValidatorParamsT = Field(description="Validator-specific parameters (e.g., CodeValidatorParams)")
batch_size: int = Field(default=10, ge=1, description="Number of records to process in each batch")
column_type: Literal["validation"] = "validation"
column_type: Literal["validation"] = Field(
default="validation", description="Discriminator field, always 'validation' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand All @@ -441,7 +483,9 @@ class SeedDatasetColumnConfig(SingleColumnConfig):
column_type: Discriminator field, always "seed-dataset" for this configuration type.
"""

column_type: Literal["seed-dataset"] = "seed-dataset"
column_type: Literal["seed-dataset"] = Field(
default="seed-dataset", description="Discriminator field, always 'seed-dataset' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand All @@ -468,9 +512,11 @@ class EmbeddingColumnConfig(SingleColumnConfig):
column_type: Discriminator field, always "embedding" for this configuration type.
"""

target_column: str
model_alias: str
column_type: Literal["embedding"] = "embedding"
target_column: str = Field(description="Name of the text column to generate embeddings for")
model_alias: str = Field(description="Alias of the model to use for embedding generation")
column_type: Literal["embedding"] = Field(
default="embedding", description="Discriminator field, always 'embedding' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -502,10 +548,16 @@ class ImageColumnConfig(SingleColumnConfig):
column_type: Discriminator field, always "image" for this configuration type.
"""

prompt: str
model_alias: str
multi_modal_context: list[ImageContext] | None = None
column_type: Literal["image"] = "image"
prompt: str = Field(
description="Jinja2 template for the image generation prompt; can reference other columns via {{ column_name }}"
)
model_alias: str = Field(description="Alias of the model to use for image generation")
multi_modal_context: list[ImageContext] | None = Field(
default=None, description="Optional list of ImageContext for image-to-image generation inputs"
)
column_type: Literal["image"] = Field(
default="image", description="Discriminator field, always 'image' for this configuration type"
)

@staticmethod
def get_column_emoji() -> str:
Expand Down Expand Up @@ -562,7 +614,9 @@ class CustomColumnConfig(SingleColumnConfig):
default=None,
description="Optional typed configuration object passed as second argument to generator function",
)
column_type: Literal["custom"] = "custom"
column_type: Literal["custom"] = Field(
default="custom", description="Discriminator field, always 'custom' for this configuration type"
)

@field_validator("generator_function")
@classmethod
Expand Down
36 changes: 22 additions & 14 deletions packages/data-designer-config/src/data_designer/config/mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ class MCPProvider(ConfigBase):
... )
"""

provider_type: Literal["sse", "streamable_http"] = "sse"
name: str
endpoint: str
api_key: str | None = None
provider_type: Literal["sse", "streamable_http"] = Field(default="sse", description="Transport type discriminator")
name: str = Field(description="Unique name used to reference this MCP provider")
endpoint: str = Field(description="SSE endpoint URL for connecting to the remote MCP server")
api_key: str | None = Field(default=None, description="Optional API key for authentication")


class LocalStdioMCPProvider(ConfigBase):
Expand All @@ -74,11 +74,15 @@ class LocalStdioMCPProvider(ConfigBase):
... )
"""

provider_type: Literal["stdio"] = "stdio"
name: str
command: str
args: list[str] = Field(default_factory=list)
env: dict[str, str] = Field(default_factory=dict)
provider_type: Literal["stdio"] = Field(
default="stdio", description="Transport type discriminator, always 'stdio' for local subprocess MCP providers"
)
name: str = Field(description="Unique name used to reference this MCP provider")
command: str = Field(description="Executable to launch the MCP server via stdio transport")
args: list[str] = Field(default_factory=list, description="Arguments passed to the MCP server executable")
env: dict[str, str] = Field(
default_factory=dict, description="Environment variables passed to the MCP server subprocess"
)


MCPProviderT: TypeAlias = Annotated[MCPProvider | LocalStdioMCPProvider, Field(discriminator="provider_type")]
Expand Down Expand Up @@ -113,8 +117,12 @@ class ToolConfig(ConfigBase):
... )
"""

tool_alias: str
providers: list[str]
allow_tools: list[str] | None = None
max_tool_call_turns: int = Field(default=5, ge=1)
timeout_sec: float | None = Field(default=None, gt=0)
tool_alias: str = Field(description="User-defined alias to reference this tool configuration in column configs")
providers: list[str] = Field(description="Names of the MCP providers to use for tool calls")
allow_tools: list[str] | None = Field(
default=None, description="Optional allowlist of tool names that restricts which tools are permitted"
)
max_tool_call_turns: int = Field(
default=5, ge=1, description="Maximum number of tool-calling turns permitted in a single generation"
)
timeout_sec: float | None = Field(default=None, gt=0, description="Timeout in seconds for MCP tool calls")
Loading
Loading