From c3b8960ff83119829b8d03f089e9e0bf5fb871f7 Mon Sep 17 00:00:00 2001 From: Mike Knepper Date: Mon, 22 Jun 2026 09:39:30 -0500 Subject: [PATCH 1/4] Agent initial impl Signed-off-by: Mike Knepper --- .../latest/pages/concepts/seed-datasets.mdx | 2 +- .../src/data_designer/config/seed_source.py | 37 ++++++++++++-- .../tests/config/test_seed_source.py | 21 ++++---- .../src/data_designer/engine/compiler.py | 7 ++- .../engine/resources/seed_reader.py | 48 ++++++++++++++++++- .../engine/resources/test_seed_reader.py | 15 ++++-- .../tests/engine/test_compiler.py | 16 ++++++- 7 files changed, 121 insertions(+), 25 deletions(-) diff --git a/fern/versions/latest/pages/concepts/seed-datasets.mdx b/fern/versions/latest/pages/concepts/seed-datasets.mdx index a9498caeb..5e948b228 100644 --- a/fern/versions/latest/pages/concepts/seed-datasets.mdx +++ b/fern/versions/latest/pages/concepts/seed-datasets.mdx @@ -138,7 +138,7 @@ Directory-backed seed datasets expose these columns: Filesystem matching -`file_pattern` matches file names only, not relative paths. `recursive=True` is the default, so nested subdirectories are searched unless you turn it off. +`file_pattern` matches file names only, not relative paths. `recursive=True` is the default, so nested subdirectories are searched unless you turn it off. Relative local `path` values are resolved by the active filesystem provider when the seed is validated or read, not when the config object is constructed. ### 📄 FileContentsSeedSource diff --git a/packages/data-designer-config/src/data_designer/config/seed_source.py b/packages/data-designer-config/src/data_designer/config/seed_source.py index 57a7eb9fc..ca1f6b2ba 100644 --- a/packages/data-designer-config/src/data_designer/config/seed_source.py +++ b/packages/data-designer-config/src/data_designer/config/seed_source.py @@ -100,9 +100,9 @@ class FileSystemSeedSource(SeedSource, ABC): ``FileSystemSeedReader`` implementation. Attributes: - path: Directory containing seed artifacts. Relative paths are resolved - from the current working directory when the config is loaded, not - from the config file location. + path: Directory containing seed artifacts. Relative local paths are + resolved by the active filesystem provider when the seed is + validated or read, not when the config object is constructed. file_pattern: Case-sensitive filename pattern used to match files under the provided directory. Patterns match basenames only, not relative paths. Defaults to ``'*'``. @@ -115,8 +115,8 @@ class FileSystemSeedSource(SeedSource, ABC): path: str = Field( ..., description=( - "Directory containing seed artifacts. Relative paths are resolved from the current working " - "directory when the config is loaded, not from the config file location." + "Directory containing seed artifacts. Relative local paths are resolved by the active filesystem " + "provider when the seed is validated or read, not when the config object is constructed." ), ) file_pattern: str = Field( @@ -155,6 +155,13 @@ def validate_file_pattern(cls, value: str | None) -> str | None: class DirectorySeedSource(FileSystemSeedSource): seed_type: Literal["directory"] = "directory" + def model_post_init(self, __context: Any) -> None: + self._runtime_path = self.path + + @property + def runtime_path(self) -> str: + return self.path + class FileContentsSeedSource(FileSystemSeedSource): seed_type: Literal["file_contents"] = "file_contents" @@ -172,6 +179,13 @@ def validate_encoding(cls, value: str) -> str: raise ValueError(f"🛑 Unknown encoding: {value!r}. Use a valid Python codec name.") from error return value + def model_post_init(self, __context: Any) -> None: + self._runtime_path = self.path + + @property + def runtime_path(self) -> str: + return self.path + def _resolve_filesystem_runtime_path(path: str) -> str: return str(Path(path).expanduser().resolve()) @@ -203,6 +217,15 @@ def get_pi_coding_agent_default_path() -> str: def _validate_filesystem_seed_source_path(value: str | None) -> str | None: + if value is None: + return None + if not value.strip(): + raise InvalidFilePathError("🛑 FileSystemSeedSource.path must be a non-empty string.") + return value + + +def _validate_local_filesystem_seed_source_path(value: str | None) -> str | None: + value = _validate_filesystem_seed_source_path(value) if value is None: return None path = Path(value).expanduser().resolve() @@ -273,6 +296,10 @@ class AgentRolloutSeedSource(FileSystemSeedSource): ), ) + @field_validator("path", mode="after") + def validate_path(cls, value: str | None) -> str | None: + return _validate_local_filesystem_seed_source_path(value) + @model_validator(mode="after") def validate_runtime_path_source(self) -> Self: default_path, _ = get_agent_rollout_format_defaults(self.format) diff --git a/packages/data-designer-config/tests/config/test_seed_source.py b/packages/data-designer-config/tests/config/test_seed_source.py index a2c46747d..d886c9ca3 100644 --- a/packages/data-designer-config/tests/config/test_seed_source.py +++ b/packages/data-designer-config/tests/config/test_seed_source.py @@ -95,12 +95,14 @@ def test_dataframe_seed_source_serialization() -> None: assert serialized == {"seed_type": "df"} -def test_directory_seed_source_requires_directory(tmp_path: Path) -> None: +def test_directory_seed_source_defers_directory_existence_validation(tmp_path: Path) -> None: file_path = tmp_path / "file.txt" file_path.write_text("alpha", encoding="utf-8") - with pytest.raises(InvalidFilePathError, match="is not a directory"): - DirectorySeedSource(path=str(file_path)) + source = DirectorySeedSource(path=str(file_path)) + + assert source.path == str(file_path) + assert source.runtime_path == str(file_path) def test_directory_seed_source_preserves_relative_path_input(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: @@ -146,7 +148,7 @@ def test_file_contents_seed_source_preserves_relative_path_input( pytest.param(FileContentsSeedSource, {"file_pattern": "*.txt"}, id="file-contents"), ], ) -def test_filesystem_seed_sources_cache_runtime_path_across_cwd_changes( +def test_filesystem_seed_sources_preserve_raw_runtime_path_across_cwd_changes( source_type: type[DirectorySeedSource] | type[FileContentsSeedSource], source_kwargs: dict[str, str], tmp_path: Path, @@ -160,12 +162,11 @@ def test_filesystem_seed_sources_cache_runtime_path_across_cwd_changes( monkeypatch.chdir(initial_root) source = source_type(path="seed-dir", **source_kwargs) - expected_runtime_path = str(initial_seed_dir.resolve()) monkeypatch.chdir(later_root) assert source.path == "seed-dir" - assert source.runtime_path == expected_runtime_path + assert source.runtime_path == "seed-dir" assert source.model_dump(mode="json")["path"] == "seed-dir" @@ -176,10 +177,10 @@ def test_seed_source_path_descriptions_document_cwd_resolution() -> None: assert "current working directory" in local_path_description assert "config file location" in local_path_description - assert "current working directory" in directory_path_description - assert "config file location" in directory_path_description - assert "current working directory" in file_contents_path_description - assert "config file location" in file_contents_path_description + assert "active filesystem provider" in directory_path_description + assert "config object is constructed" in directory_path_description + assert "active filesystem provider" in file_contents_path_description + assert "config object is constructed" in file_contents_path_description def test_file_contents_seed_source_parses_from_dict(tmp_path: Path) -> None: diff --git a/packages/data-designer-engine/src/data_designer/engine/compiler.py b/packages/data-designer-engine/src/data_designer/engine/compiler.py index fa3917269..bef430f92 100644 --- a/packages/data-designer-engine/src/data_designer/engine/compiler.py +++ b/packages/data-designer-engine/src/data_designer/engine/compiler.py @@ -10,7 +10,7 @@ from data_designer.config.errors import InvalidConfigError from data_designer.config.sampler_params import UUIDSamplerParams from data_designer.engine.resources.resource_provider import ResourceProvider -from data_designer.engine.resources.seed_reader import SeedReader +from data_designer.engine.resources.seed_reader import SeedReader, SeedReaderConfigError from data_designer.engine.validation import ViolationLevel, rich_print_violations, validate_data_designer_config logger = logging.getLogger(__name__) @@ -31,7 +31,10 @@ def _resolve_and_add_seed_columns(config: DataDesignerConfig, seed_reader: SeedR if not seed_reader: return - seed_col_names = seed_reader.get_column_names() + try: + seed_col_names = seed_reader.get_column_names() + except SeedReaderConfigError as error: + raise InvalidConfigError(str(error)) from error existing_columns = {column.name for column in config.columns} colliding_columns = {name for name in seed_col_names if name in existing_columns} if colliding_columns: diff --git a/packages/data-designer-engine/src/data_designer/engine/resources/seed_reader.py b/packages/data-designer-engine/src/data_designer/engine/resources/seed_reader.py index 8f2574cf2..dedf15122 100644 --- a/packages/data-designer-engine/src/data_designer/engine/resources/seed_reader.py +++ b/packages/data-designer-engine/src/data_designer/engine/resources/seed_reader.py @@ -9,7 +9,7 @@ from copy import copy from dataclasses import dataclass from fnmatch import fnmatchcase -from pathlib import Path, PurePosixPath +from pathlib import Path, PurePath, PurePosixPath from typing import TYPE_CHECKING, Any, ClassVar, Generic, Protocol, TypeVar, get_args, get_origin from fsspec.implementations.dirfs import DirFileSystem @@ -50,12 +50,37 @@ class SeedReaderError(DataDesignerError): ... +class SeedReaderConfigError(SeedReaderError): ... + + @dataclass(frozen=True) class SeedReaderFileSystemContext: """Filesystem and root path available to filesystem seed-reader plugins.""" fs: AbstractFileSystem - root_path: Path + root_path: PurePath + + +class FileSystemProvider(Protocol): + """Resolves a runtime path into a rooted fsspec filesystem.""" + + def create_context(self, *, runtime_path: str) -> SeedReaderFileSystemContext: ... + + def ensure_root_exists(self, *, runtime_path: str) -> None: ... + + +class LocalFileSystemProvider: + """Default filesystem provider backed by the local disk.""" + + def create_context(self, *, runtime_path: str) -> SeedReaderFileSystemContext: + resolved_root_path = Path(runtime_path).expanduser().resolve() + rooted_fs = DirFileSystem(path=str(resolved_root_path), fs=LocalFileSystem()) + return SeedReaderFileSystemContext(fs=rooted_fs, root_path=resolved_root_path) + + def ensure_root_exists(self, *, runtime_path: str) -> None: + resolved_root_path = Path(runtime_path).expanduser().resolve() + if not resolved_root_path.is_dir(): + raise SeedReaderConfigError(f"🛑 Seed source directory '{resolved_root_path}' does not exist.") class SeedReaderBatch(Protocol): @@ -388,12 +413,23 @@ class FileSystemSeedReader(SeedReader[FileSystemSourceT], ABC): output_columns: ClassVar[list[str] | None] = None + def __init__(self, fs_provider: FileSystemProvider | None = None) -> None: + self._fs_provider = fs_provider or LocalFileSystemProvider() + def _reset_attachment_state(self) -> None: super()._reset_attachment_state() self._filesystem_context = None self._output_df = None self._row_manifest_df = None + def create_filesystem_context(self, root_path: Path | str) -> SeedReaderFileSystemContext: + """Create a rooted filesystem context for directory-backed seed readers. + + This hook is preserved for existing plugin readers. New host integrations + should prefer passing a ``FileSystemProvider`` to the reader constructor. + """ + return self._get_fs_provider().create_context(runtime_path=str(root_path)) + def create_duckdb_connection(self) -> duckdb.DuckDBPyConnection: return self.create_dataframe_duckdb_connection( table_name=self.get_dataset_uri(), @@ -495,10 +531,18 @@ def _get_filesystem_context(self) -> SeedReaderFileSystemContext: self._ensure_attached() context = getattr(self, "_filesystem_context", None) if context is None: + self._get_fs_provider().ensure_root_exists(runtime_path=self.source.runtime_path) context = self.create_filesystem_context(self.source.runtime_path) self._filesystem_context = context return context + def _get_fs_provider(self) -> FileSystemProvider: + provider = getattr(self, "_fs_provider", None) + if provider is None: + provider = LocalFileSystemProvider() + self._fs_provider = provider + return provider + def _get_manifest_dataset_uri(self) -> str: return self._build_internal_table_name("manifest") diff --git a/packages/data-designer-engine/tests/engine/resources/test_seed_reader.py b/packages/data-designer-engine/tests/engine/resources/test_seed_reader.py index 979e66a12..717a29787 100644 --- a/packages/data-designer-engine/tests/engine/resources/test_seed_reader.py +++ b/packages/data-designer-engine/tests/engine/resources/test_seed_reader.py @@ -738,7 +738,7 @@ def test_local_file_seed_reader_uses_load_time_runtime_path_when_cwd_changes( assert list(df["value"]) == [1] -def test_directory_seed_reader_uses_load_time_runtime_path_when_cwd_changes( +def test_directory_seed_reader_uses_read_time_runtime_path_when_cwd_changes( tmp_path: Path, monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -760,8 +760,17 @@ def test_directory_seed_reader_uses_load_time_runtime_path_when_cwd_changes( df = reader.create_duckdb_connection().execute(f"SELECT * FROM '{reader.get_dataset_uri()}'").df() assert source.path == "seed-dir" - assert list(df["relative_path"]) == ["alpha.txt"] - assert list(df["source_path"]) == [str((initial_seed_dir / "alpha.txt").resolve())] + assert list(df["relative_path"]) == ["beta.txt"] + assert list(df["source_path"]) == [str((later_seed_dir / "beta.txt").resolve())] + + +def test_directory_seed_reader_reports_missing_root_before_matching_files(tmp_path: Path) -> None: + missing_dir = tmp_path / "missing" + reader = DirectorySeedReader() + reader.attach(DirectorySeedSource(path=str(missing_dir), file_pattern="*.txt"), PlaintextResolver()) + + with pytest.raises(SeedReaderError, match="Seed source directory .* does not exist"): + reader.get_column_names() def test_filesystem_seed_reader_on_attach_requires_no_super_and_resets_state(tmp_path: Path) -> None: diff --git a/packages/data-designer-engine/tests/engine/test_compiler.py b/packages/data-designer-engine/tests/engine/test_compiler.py index fb3f9dbc0..81d7c0194 100644 --- a/packages/data-designer-engine/tests/engine/test_compiler.py +++ b/packages/data-designer-engine/tests/engine/test_compiler.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -from unittest.mock import patch +from unittest.mock import Mock, patch import pytest @@ -12,7 +12,7 @@ from data_designer.config.seed_source import HuggingFaceSeedSource from data_designer.engine.compiler import compile_data_designer_config from data_designer.engine.resources.resource_provider import ResourceProvider -from data_designer.engine.resources.seed_reader import SeedReader +from data_designer.engine.resources.seed_reader import SeedReader, SeedReaderConfigError from data_designer.engine.validation import Violation, ViolationLevel, ViolationType @@ -55,6 +55,18 @@ def test_errors_on_seed_column_collisions(resource_provider: ResourceProvider): assert "city" in str(excinfo) +def test_seed_reader_config_errors_are_invalid_config_errors(resource_provider: ResourceProvider): + builder = DataDesignerConfigBuilder() + builder.with_seed_dataset(HuggingFaceSeedSource(path="hf://datasets/test/data.csv")) + resource_provider.seed_reader = Mock(spec=SeedReader) + resource_provider.seed_reader.get_column_names.side_effect = SeedReaderConfigError("missing seed root") + + with pytest.raises(InvalidConfigError, match="missing seed root") as excinfo: + compile_data_designer_config(builder.build(), resource_provider) + + assert isinstance(excinfo.value.__cause__, SeedReaderConfigError) + + def test_validation_errors(resource_provider: ResourceProvider): builder = DataDesignerConfigBuilder() builder.add_column( From be0027ec4e916b67ec8ce6caf49b6a4b1e4a2b35 Mon Sep 17 00:00:00 2001 From: Mike Knepper Date: Mon, 22 Jun 2026 10:00:58 -0500 Subject: [PATCH 2/4] SeedSource cleanup Signed-off-by: Mike Knepper --- .../src/data_designer/config/seed_source.py | 59 ++++--------------- .../tests/config/test_seed_source.py | 53 +++++++++++++++++ 2 files changed, 66 insertions(+), 46 deletions(-) diff --git a/packages/data-designer-config/src/data_designer/config/seed_source.py b/packages/data-designer-config/src/data_designer/config/seed_source.py index ca1f6b2ba..e5040182d 100644 --- a/packages/data-designer-config/src/data_designer/config/seed_source.py +++ b/packages/data-designer-config/src/data_designer/config/seed_source.py @@ -110,8 +110,6 @@ class FileSystemSeedSource(SeedSource, ABC): directory for matching files. Defaults to ``True``. """ - _runtime_path: str | None = PrivateAttr(default=None) - path: str = Field( ..., description=( @@ -137,32 +135,22 @@ def validate_path(cls, value: str | None) -> str | None: # and inherited validators fire for all subclasses. return _validate_filesystem_seed_source_path(value) - def model_post_init(self, __context: Any) -> None: - # None guard is exercised by AgentRolloutSeedSource (path: str | None) via inheritance. - self._runtime_path = None if self.path is None else _resolve_filesystem_runtime_path(self.path) - - @property - def runtime_path(self) -> str: - if self._runtime_path is None: - self._runtime_path = _resolve_filesystem_runtime_path(self.path) - return self._runtime_path - @field_validator("file_pattern", mode="after") def validate_file_pattern(cls, value: str | None) -> str | None: return _validate_filesystem_seed_source_file_pattern(value) - -class DirectorySeedSource(FileSystemSeedSource): - seed_type: Literal["directory"] = "directory" - - def model_post_init(self, __context: Any) -> None: - self._runtime_path = self.path - @property def runtime_path(self) -> str: + # Path resolution and existence checks are the filesystem provider's job at read + # time, not the config object's. Keeping the raw value here preserves relative + # paths and avoids assuming a local filesystem. return self.path +class DirectorySeedSource(FileSystemSeedSource): + seed_type: Literal["directory"] = "directory" + + class FileContentsSeedSource(FileSystemSeedSource): seed_type: Literal["file_contents"] = "file_contents" @@ -179,13 +167,6 @@ def validate_encoding(cls, value: str) -> str: raise ValueError(f"🛑 Unknown encoding: {value!r}. Use a valid Python codec name.") from error return value - def model_post_init(self, __context: Any) -> None: - self._runtime_path = self.path - - @property - def runtime_path(self) -> str: - return self.path - def _resolve_filesystem_runtime_path(path: str) -> str: return str(Path(path).expanduser().resolve()) @@ -224,16 +205,6 @@ def _validate_filesystem_seed_source_path(value: str | None) -> str | None: return value -def _validate_local_filesystem_seed_source_path(value: str | None) -> str | None: - value = _validate_filesystem_seed_source_path(value) - if value is None: - return None - path = Path(value).expanduser().resolve() - if not path.is_dir(): - raise InvalidFilePathError(f"🛑 Path {path} is not a directory.") - return value - - def _validate_filesystem_seed_source_file_pattern(value: str | None) -> str | None: if value is None: return None @@ -296,10 +267,6 @@ class AgentRolloutSeedSource(FileSystemSeedSource): ), ) - @field_validator("path", mode="after") - def validate_path(cls, value: str | None) -> str | None: - return _validate_local_filesystem_seed_source_path(value) - @model_validator(mode="after") def validate_runtime_path_source(self) -> Self: default_path, _ = get_agent_rollout_format_defaults(self.format) @@ -309,14 +276,14 @@ def validate_runtime_path_source(self) -> Self: @property def runtime_path(self) -> str: - if self._runtime_path is not None: - return self._runtime_path + # Path resolution and existence checks happen in the filesystem provider at read + # time. When no explicit path is given, fall back to the format's default root. + if self.path is not None: + return self.path default_path, _ = get_agent_rollout_format_defaults(self.format) - resolved_path = self.path if self.path is not None else default_path - if resolved_path is None: + if default_path is None: raise ValueError(f"🛑 AgentRolloutSeedSource.path is required for format {self.format.value!r}.") - self._runtime_path = _resolve_filesystem_runtime_path(resolved_path) - return self._runtime_path + return default_path @property def resolved_file_pattern(self) -> str: diff --git a/packages/data-designer-config/tests/config/test_seed_source.py b/packages/data-designer-config/tests/config/test_seed_source.py index d886c9ca3..24a54eb54 100644 --- a/packages/data-designer-config/tests/config/test_seed_source.py +++ b/packages/data-designer-config/tests/config/test_seed_source.py @@ -4,6 +4,7 @@ from __future__ import annotations from pathlib import Path +from typing import Literal import pytest @@ -15,6 +16,7 @@ AgentRolloutSeedSource, DirectorySeedSource, FileContentsSeedSource, + FileSystemSeedSource, LocalFileSeedSource, ) from data_designer.config.seed_source_dataframe import DataFrameSeedSource @@ -224,6 +226,17 @@ def test_filesystem_seed_sources_reject_path_like_file_patterns( source_type(path=str(tmp_path), file_pattern=file_pattern) +def test_filesystem_seed_source_subclass_inherits_runtime_path(tmp_path: Path) -> None: + # Plugin authors subclass FileSystemSeedSource directly; readers rely on + # `source.runtime_path`, so the base must provide it without an override. + class PluginSeedSource(FileSystemSeedSource): + seed_type: Literal["plugin-seed-source"] = "plugin-seed-source" + + source = PluginSeedSource(path=str(tmp_path)) + + assert source.runtime_path == str(tmp_path) + + @pytest.mark.parametrize( ("rollout_format", "file_pattern", "error_message"), [ @@ -268,6 +281,46 @@ def test_agent_rollout_seed_source_requires_explicit_atif_path() -> None: AgentRolloutSeedSource(format=AgentRolloutFormat.ATIF) +def test_agent_rollout_seed_source_defers_directory_existence_validation(tmp_path: Path) -> None: + missing_dir = tmp_path / "does-not-exist" + + source = AgentRolloutSeedSource(path=str(missing_dir), format=AgentRolloutFormat.ATIF) + + assert source.path == str(missing_dir) + assert source.runtime_path == str(missing_dir) + + +def test_agent_rollout_seed_source_preserves_raw_runtime_path_across_cwd_changes( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + initial_root = tmp_path / "initial" + later_root = tmp_path / "later" + (initial_root / "seed-dir").mkdir(parents=True) + later_root.mkdir() + + monkeypatch.chdir(initial_root) + source = AgentRolloutSeedSource(path="seed-dir", format=AgentRolloutFormat.ATIF) + + monkeypatch.chdir(later_root) + + assert source.path == "seed-dir" + assert source.runtime_path == "seed-dir" + assert source.model_dump(mode="json")["path"] == "seed-dir" + + +def test_agent_rollout_seed_source_runtime_path_falls_back_to_format_default( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + monkeypatch.setenv("HOME", str(tmp_path)) + + source = AgentRolloutSeedSource(format=AgentRolloutFormat.CLAUDE_CODE) + + assert source.path is None + assert source.runtime_path == str(tmp_path / ".claude" / "projects") + + def test_agent_rollout_seed_source_uses_default_atif_file_pattern(tmp_path: Path) -> None: trace_dir = tmp_path / "atif" trace_dir.mkdir() From aa83da048c4c8d52e8764ed72af3ef3c49c5add7 Mon Sep 17 00:00:00 2001 From: Mike Knepper Date: Tue, 23 Jun 2026 14:27:58 -0500 Subject: [PATCH 3/4] Add Path/PurePath guard Signed-off-by: Mike Knepper --- .../src/data_designer/engine/resources/seed_reader.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/packages/data-designer-engine/src/data_designer/engine/resources/seed_reader.py b/packages/data-designer-engine/src/data_designer/engine/resources/seed_reader.py index dedf15122..93869a8e7 100644 --- a/packages/data-designer-engine/src/data_designer/engine/resources/seed_reader.py +++ b/packages/data-designer-engine/src/data_designer/engine/resources/seed_reader.py @@ -697,8 +697,17 @@ def _get_parse_context(self, context: SeedReaderFileSystemContext) -> AgentRollo if self._parse_context is not self._PARSE_CONTEXT_UNSET: return self._parse_context + # Agent rollout handlers operate on the local filesystem directly (root_path.glob, + # root_path / relative_path), so they require a concrete Path rather than the + # PurePath the context type permits for remote providers. + root_path = context.root_path + if not isinstance(root_path, Path): + raise SeedReaderConfigError( + f"🛑 Agent rollout seed readers require a local filesystem, but got non-local root path " + f"{root_path!r} ({type(root_path).__name__})." + ) handler = self.get_format_handler() - self._parse_context = handler.build_parse_context(root_path=context.root_path, recursive=self.source.recursive) + self._parse_context = handler.build_parse_context(root_path=root_path, recursive=self.source.recursive) return self._parse_context From b6dcfdde9ded1608b4f96bdc08e146a44656ee14 Mon Sep 17 00:00:00 2001 From: Mike Knepper Date: Tue, 23 Jun 2026 14:34:08 -0500 Subject: [PATCH 4/4] Fix stale docstring Signed-off-by: Mike Knepper --- .../src/data_designer/config/seed_source.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/data-designer-config/src/data_designer/config/seed_source.py b/packages/data-designer-config/src/data_designer/config/seed_source.py index e5040182d..0e06c82ce 100644 --- a/packages/data-designer-config/src/data_designer/config/seed_source.py +++ b/packages/data-designer-config/src/data_designer/config/seed_source.py @@ -253,8 +253,8 @@ class AgentRolloutSeedSource(FileSystemSeedSource): "Claude Code defaults to ~/.claude/projects, Codex defaults to ~/.codex/sessions, " "Hermes Agent defaults to ~/.hermes/sessions, " "and Pi Coding Agent defaults to ~/.pi/agent/sessions. " - "Relative paths are resolved from the current working directory when the config is loaded, " - "not from the config file location." + "Relative local paths are resolved by the active filesystem provider when the seed is " + "validated or read, not when the config object is constructed." ), )