From 4ba8fd9b8f52bcacab67c8f159abcebcdf7ed4f0 Mon Sep 17 00:00:00 2001
From: "Eric W. Tramel" <eric.tramel@gmail.com>
Date: Wed, 6 May 2026 20:53:02 -0400
Subject: [PATCH 1/4] Add Generalist agent environment plugin

---
 .github/CODEOWNERS                            |   1 +
 catalog/plugins.json                          |  25 +
 .../index.md                                  |  78 ++
 .../usage.md                                  |  54 ++
 docs/plugins/index.md                         |  11 +
 .../CODEOWNERS                                |   3 +
 .../README.md                                 |  34 +
 .../docs/index.md                             |  78 ++
 .../docs/usage.md                             |  54 ++
 .../pyproject.toml                            |  36 +
 .../__init__.py                               |   2 +
 .../config.py                                 | 144 +++
 .../impl.py                                   | 858 ++++++++++++++++++
 .../plugin.py                                 |  10 +
 .../tests/test_plugin.py                      | 212 +++++
 pyproject.toml                                |   2 +-
 uv.lock                                       |  30 +-
 zensical.toml                                 |   4 +
 18 files changed, 1626 insertions(+), 10 deletions(-)
 create mode 100644 docs/plugins/data-designer-generalist-agent-env/index.md
 create mode 100644 docs/plugins/data-designer-generalist-agent-env/usage.md
 create mode 100644 plugins/data-designer-generalist-agent-env/CODEOWNERS
 create mode 100644 plugins/data-designer-generalist-agent-env/README.md
 create mode 100644 plugins/data-designer-generalist-agent-env/docs/index.md
 create mode 100644 plugins/data-designer-generalist-agent-env/docs/usage.md
 create mode 100644 plugins/data-designer-generalist-agent-env/pyproject.toml
 create mode 100644 plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/__init__.py
 create mode 100644 plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/config.py
 create mode 100644 plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/impl.py
 create mode 100644 plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/plugin.py
 create mode 100644 plugins/data-designer-generalist-agent-env/tests/test_plugin.py

diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index eb06565..b83428a 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -7,4 +7,5 @@
 /.github/ @NVIDIA-NeMo/data_designer_reviewers
 
 # Plugins
+/plugins/data-designer-generalist-agent-env/ eric.tramel@gmail.com
 /plugins/data-designer-template/ @NVIDIA-NeMo/data_designer_reviewers
diff --git a/catalog/plugins.json b/catalog/plugins.json
index bab1a70..ead8446 100644
--- a/catalog/plugins.json
+++ b/catalog/plugins.json
@@ -1,6 +1,31 @@
 {
   "schema_version": 1,
   "plugins": [
+    {
+      "name": "generalist-agent-env",
+      "plugin_type": "column-generator",
+      "description": "Generalist agent environment tuple generator for Data Designer",
+      "package": {
+        "name": "data-designer-generalist-agent-env",
+        "version": "0.1.0",
+        "path": "plugins/data-designer-generalist-agent-env"
+      },
+      "entry_point": {
+        "group": "data_designer.plugins",
+        "name": "generalist-agent-env",
+        "value": "data_designer_generalist_agent_env.plugin:plugin"
+      },
+      "compatibility": {
+        "python": {
+          "specifier": ">=3.10"
+        },
+        "data_designer": {
+          "requirement": "data-designer>=0.5.9",
+          "specifier": ">=0.5.9",
+          "marker": null
+        }
+      }
+    },
     {
       "name": "text-transform",
       "plugin_type": "column-generator",
diff --git a/docs/plugins/data-designer-generalist-agent-env/index.md b/docs/plugins/data-designer-generalist-agent-env/index.md
new file mode 100644
index 0000000..23137c9
--- /dev/null
+++ b/docs/plugins/data-designer-generalist-agent-env/index.md
@@ -0,0 +1,78 @@
+# data-designer-generalist-agent-env
+
+The `data-designer-generalist-agent-env` plugin adds a `generalist-agent-env`
+column type for creating Generalist-style agent environment tuples inspired by
+the DeepSeek-V3.2 automatic environment synthesis workflow.
+
+For each seed row, the plugin builds a row-local sandbox database, exposes
+task-specific tool functions, synthesizes a constrained task, emits a solution
+function that only calls tools and performs local logic, and emits a verifier
+function that checks candidate answers against the database.
+
+## Installation
+
+```bash
+uv add "data-designer>=0.5.9" data-designer-generalist-agent-env
+```
+
+## Column type
+
+Use the `generalist-agent-env` column type when a dataset needs structured
+`<environment, tools, task, verifier>` records for agent training or evaluation.
+
+| Field | Required | Description |
+| --- | --- | --- |
+| `name` | Yes | Output column name. |
+| `task_category_column` | Yes | Existing column containing a task category such as `planning a travel itinerary`. |
+| `context_columns` | No | Existing columns copied into the synthesized sandbox database context. |
+| `difficulty` | No | Final task difficulty: `simple`, `medium`, or `hard`; defaults to `hard`. |
+| `database_size` | No | Number of sandbox records to synthesize per row; defaults to `8`. |
+| `required_tag` | No | Optional tag that the valid answer must contain. |
+| `max_cost` | No | Optional maximum cost constraint. Unsatisfiable values are repaired upward. |
+| `min_score` | No | Optional minimum score constraint. Unsatisfiable values are repaired downward. |
+
+## Usage
+
+```python
+import pandas as pd
+from data_designer.config.config_builder import DataDesignerConfigBuilder
+from data_designer.config.seed_source_dataframe import DataFrameSeedSource
+
+seed_df = pd.DataFrame(
+    {
+        "category": ["planning a travel itinerary"],
+        "constraints": ["compare candidate plans by score, cost, and family suitability"],
+    }
+)
+
+builder = DataDesignerConfigBuilder()
+builder.with_seed_dataset(DataFrameSeedSource(df=seed_df))
+builder.add_column(
+    name="agent_env",
+    column_type="generalist-agent-env",
+    task_category_column="category",
+    context_columns=["constraints"],
+    required_tag="family",
+)
+```
+
+The generated `agent_env` value is a dictionary with these top-level keys:
+
+| Key | Description |
+| --- | --- |
+| `environment` | Sandbox metadata, row-local database, schema, and source context. |
+| `tools` | Synthesized tool descriptors and Python function sources. |
+| `tool_module_source` | Executable Python source defining the hidden database and selected tools. |
+| `task` | Prompt, difficulty, constraints, and answer schema. |
+| `solution` | Python `solve(tools)` source restricted to tool calls and local logic. |
+| `verifier` | Python `verify(answer, database)` source and reference validation status. |
+| `reference_answer` | The generated solution output that the verifier accepts. |
+| `task_iterations` | Simple-to-final task, solution, verifier, and augmentation artifacts. |
+| `synthesis_trace` | Environment construction, task synthesis, tool augmentation, solution, and verification events. |
+
+## Behavior Notes
+
+The plugin is deterministic and does not call the Internet. It records `bash`
+and `search` as base sandbox tools and uses the seed row to synthesize the
+sandbox database locally. Downstream workflows can replace or augment that
+database with retrieved records before using the generated task and verifier.
diff --git a/docs/plugins/data-designer-generalist-agent-env/usage.md b/docs/plugins/data-designer-generalist-agent-env/usage.md
new file mode 100644
index 0000000..f46421d
--- /dev/null
+++ b/docs/plugins/data-designer-generalist-agent-env/usage.md
@@ -0,0 +1,54 @@
+# Usage
+
+This example creates one Generalist agent environment tuple from a trip-planning
+category. The same pattern works for other task categories where searching the
+candidate space is harder than verifying a proposed answer.
+
+```python
+import pandas as pd
+from data_designer.config.config_builder import DataDesignerConfigBuilder
+from data_designer.config.seed_source_dataframe import DataFrameSeedSource
+from data_designer.interface.data_designer import DataDesigner
+
+seed_df = pd.DataFrame(
+    {
+        "category": ["planning a travel itinerary"],
+        "constraints": ["family-friendly museums, moderate budget, reliable transport"],
+    }
+)
+
+builder = DataDesignerConfigBuilder()
+builder.with_seed_dataset(DataFrameSeedSource(df=seed_df))
+builder.add_column(
+    name="agent_env",
+    column_type="generalist-agent-env",
+    task_category_column="category",
+    context_columns=["constraints"],
+    difficulty="hard",
+    required_tag="family",
+)
+
+result = DataDesigner(artifact_path="artifacts").preview(builder, num_records=1)
+environment_tuple = result.dataset.loc[0, "agent_env"]
+```
+
+The solution can be smoke-tested by executing the generated source:
+
+```python
+tool_namespace = {}
+exec(environment_tuple["tool_module_source"], tool_namespace)
+tools = {tool["name"]: tool_namespace[tool["name"]] for tool in environment_tuple["tools"]}
+
+solution_namespace = {}
+exec(environment_tuple["solution"]["source"], solution_namespace)
+answer = solution_namespace["solve"](tools)
+
+verifier_namespace = {}
+exec(environment_tuple["verifier"]["source"], verifier_namespace)
+assert verifier_namespace["verify"](answer, environment_tuple["environment"]["database"])
+```
+
+The output task is intentionally search-like: the solving agent must inspect,
+filter, and rank records through the tool interface. The verifier remains
+straightforward because it checks fixed constraints and a deterministic
+tie-break order directly against the database.
diff --git a/docs/plugins/index.md b/docs/plugins/index.md
index 4e54e2e..6349dd5 100644
--- a/docs/plugins/index.md
+++ b/docs/plugins/index.md
@@ -5,6 +5,17 @@
 Browse available Data Designer plugins by what they add to your data generation workflow.
 
 <div class="plugin-doc-grid">
+  <a class="plugin-doc-card" href="data-designer-generalist-agent-env/" aria-label="Open data-designer-generalist-agent-env documentation">
+    <span class="plugin-doc-card__header">
+      <span class="plugin-doc-card__title">data-designer-generalist-agent-env</span>
+      <span class="plugin-doc-card__version">v0.1.0</span>
+    </span>
+    <span class="plugin-doc-card__description">Generalist agent environment tuple generator for Data Designer</span>
+    <span class="plugin-doc-card__section">
+      <span class="plugin-doc-card__label">Column types</span>
+      <span class="plugin-doc-card__chips"><span class="plugin-doc-chip">generalist-agent-env</span></span>
+    </span>
+  </a>
   <a class="plugin-doc-card" href="data-designer-template/" aria-label="Open data-designer-template documentation">
     <span class="plugin-doc-card__header">
       <span class="plugin-doc-card__title">data-designer-template</span>
diff --git a/plugins/data-designer-generalist-agent-env/CODEOWNERS b/plugins/data-designer-generalist-agent-env/CODEOWNERS
new file mode 100644
index 0000000..013e51f
--- /dev/null
+++ b/plugins/data-designer-generalist-agent-env/CODEOWNERS
@@ -0,0 +1,3 @@
+# Owner(s) of this plugin — used to generate the root CODEOWNERS file.
+# GitHub accepts @username, @org/team, or email format.
+* eric.tramel@gmail.com
diff --git a/plugins/data-designer-generalist-agent-env/README.md b/plugins/data-designer-generalist-agent-env/README.md
new file mode 100644
index 0000000..76115dc
--- /dev/null
+++ b/plugins/data-designer-generalist-agent-env/README.md
@@ -0,0 +1,34 @@
+# data-designer-generalist-agent-env
+
+Generate Generalist-style agent environment tuples from seed task categories.
+Each output value contains a sandbox database, synthesized task-specific tool
+functions, a task prompt, a tool-only solution function, and a verifier function.
+
+## Installation
+
+```bash
+uv add "data-designer>=0.5.9" data-designer-generalist-agent-env
+```
+
+## Usage
+
+Once installed, the `generalist-agent-env` column type is automatically discovered by
+[NeMo Data Designer](https://github.com/NVIDIA-NeMo/DataDesigner).
+
+Configure the column with a task category column and optional context columns:
+
+```python
+builder.add_column(
+    name="agent_env",
+    column_type="generalist-agent-env",
+    task_category_column="category",
+    context_columns=["constraints"],
+    difficulty="hard",
+)
+```
+
+For the full plugin authoring guide, see the
+[main repository docs](https://nvidia-nemo.github.io/DataDesignerPlugins/authoring/).
+
+Plugin documentation for the repository site lives in this package's `docs/`
+directory.
diff --git a/plugins/data-designer-generalist-agent-env/docs/index.md b/plugins/data-designer-generalist-agent-env/docs/index.md
new file mode 100644
index 0000000..23137c9
--- /dev/null
+++ b/plugins/data-designer-generalist-agent-env/docs/index.md
@@ -0,0 +1,78 @@
+# data-designer-generalist-agent-env
+
+The `data-designer-generalist-agent-env` plugin adds a `generalist-agent-env`
+column type for creating Generalist-style agent environment tuples inspired by
+the DeepSeek-V3.2 automatic environment synthesis workflow.
+
+For each seed row, the plugin builds a row-local sandbox database, exposes
+task-specific tool functions, synthesizes a constrained task, emits a solution
+function that only calls tools and performs local logic, and emits a verifier
+function that checks candidate answers against the database.
+
+## Installation
+
+```bash
+uv add "data-designer>=0.5.9" data-designer-generalist-agent-env
+```
+
+## Column type
+
+Use the `generalist-agent-env` column type when a dataset needs structured
+`<environment, tools, task, verifier>` records for agent training or evaluation.
+
+| Field | Required | Description |
+| --- | --- | --- |
+| `name` | Yes | Output column name. |
+| `task_category_column` | Yes | Existing column containing a task category such as `planning a travel itinerary`. |
+| `context_columns` | No | Existing columns copied into the synthesized sandbox database context. |
+| `difficulty` | No | Final task difficulty: `simple`, `medium`, or `hard`; defaults to `hard`. |
+| `database_size` | No | Number of sandbox records to synthesize per row; defaults to `8`. |
+| `required_tag` | No | Optional tag that the valid answer must contain. |
+| `max_cost` | No | Optional maximum cost constraint. Unsatisfiable values are repaired upward. |
+| `min_score` | No | Optional minimum score constraint. Unsatisfiable values are repaired downward. |
+
+## Usage
+
+```python
+import pandas as pd
+from data_designer.config.config_builder import DataDesignerConfigBuilder
+from data_designer.config.seed_source_dataframe import DataFrameSeedSource
+
+seed_df = pd.DataFrame(
+    {
+        "category": ["planning a travel itinerary"],
+        "constraints": ["compare candidate plans by score, cost, and family suitability"],
+    }
+)
+
+builder = DataDesignerConfigBuilder()
+builder.with_seed_dataset(DataFrameSeedSource(df=seed_df))
+builder.add_column(
+    name="agent_env",
+    column_type="generalist-agent-env",
+    task_category_column="category",
+    context_columns=["constraints"],
+    required_tag="family",
+)
+```
+
+The generated `agent_env` value is a dictionary with these top-level keys:
+
+| Key | Description |
+| --- | --- |
+| `environment` | Sandbox metadata, row-local database, schema, and source context. |
+| `tools` | Synthesized tool descriptors and Python function sources. |
+| `tool_module_source` | Executable Python source defining the hidden database and selected tools. |
+| `task` | Prompt, difficulty, constraints, and answer schema. |
+| `solution` | Python `solve(tools)` source restricted to tool calls and local logic. |
+| `verifier` | Python `verify(answer, database)` source and reference validation status. |
+| `reference_answer` | The generated solution output that the verifier accepts. |
+| `task_iterations` | Simple-to-final task, solution, verifier, and augmentation artifacts. |
+| `synthesis_trace` | Environment construction, task synthesis, tool augmentation, solution, and verification events. |
+
+## Behavior Notes
+
+The plugin is deterministic and does not call the Internet. It records `bash`
+and `search` as base sandbox tools and uses the seed row to synthesize the
+sandbox database locally. Downstream workflows can replace or augment that
+database with retrieved records before using the generated task and verifier.
diff --git a/plugins/data-designer-generalist-agent-env/docs/usage.md b/plugins/data-designer-generalist-agent-env/docs/usage.md
new file mode 100644
index 0000000..f46421d
--- /dev/null
+++ b/plugins/data-designer-generalist-agent-env/docs/usage.md
@@ -0,0 +1,54 @@
+# Usage
+
+This example creates one Generalist agent environment tuple from a trip-planning
+category. The same pattern works for other task categories where searching the
+candidate space is harder than verifying a proposed answer.
+
+```python
+import pandas as pd
+from data_designer.config.config_builder import DataDesignerConfigBuilder
+from data_designer.config.seed_source_dataframe import DataFrameSeedSource
+from data_designer.interface.data_designer import DataDesigner
+
+seed_df = pd.DataFrame(
+    {
+        "category": ["planning a travel itinerary"],
+        "constraints": ["family-friendly museums, moderate budget, reliable transport"],
+    }
+)
+
+builder = DataDesignerConfigBuilder()
+builder.with_seed_dataset(DataFrameSeedSource(df=seed_df))
+builder.add_column(
+    name="agent_env",
+    column_type="generalist-agent-env",
+    task_category_column="category",
+    context_columns=["constraints"],
+    difficulty="hard",
+    required_tag="family",
+)
+
+result = DataDesigner(artifact_path="artifacts").preview(builder, num_records=1)
+environment_tuple = result.dataset.loc[0, "agent_env"]
+```
+
+The solution can be smoke-tested by executing the generated source:
+
+```python
+tool_namespace = {}
+exec(environment_tuple["tool_module_source"], tool_namespace)
+tools = {tool["name"]: tool_namespace[tool["name"]] for tool in environment_tuple["tools"]}
+
+solution_namespace = {}
+exec(environment_tuple["solution"]["source"], solution_namespace)
+answer = solution_namespace["solve"](tools)
+
+verifier_namespace = {}
+exec(environment_tuple["verifier"]["source"], verifier_namespace)
+assert verifier_namespace["verify"](answer, environment_tuple["environment"]["database"])
+```
+
+The output task is intentionally search-like: the solving agent must inspect,
+filter, and rank records through the tool interface. The verifier remains
+straightforward because it checks fixed constraints and a deterministic
+tie-break order directly against the database.
diff --git a/plugins/data-designer-generalist-agent-env/pyproject.toml b/plugins/data-designer-generalist-agent-env/pyproject.toml
new file mode 100644
index 0000000..14bf78a
--- /dev/null
+++ b/plugins/data-designer-generalist-agent-env/pyproject.toml
@@ -0,0 +1,36 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+[project]
+name = "data-designer-generalist-agent-env"
+version = "0.1.0"
+description = "Generalist agent environment tuple generator for Data Designer"
+requires-python = ">=3.10"
+dependencies = [
+    "data-designer>=0.5.9",
+]
+license = "Apache-2.0"
+readme = "README.md"
+authors = [
+    {name = "NVIDIA Corporation"},
+]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Programming Language :: Python :: 3",
+]
+
+[project.entry-points."data_designer.plugins"]
+generalist-agent-env = "data_designer_generalist_agent_env.plugin:plugin"
+
+[project.urls]
+Repository = "https://github.com/NVIDIA-NeMo/DataDesignerPlugins"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/data_designer_generalist_agent_env"]
+
+[tool.ruff]
+extend = "../../pyproject.toml"
diff --git a/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/__init__.py b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/__init__.py
new file mode 100644
index 0000000..52a7a9d
--- /dev/null
+++ b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
diff --git a/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/config.py b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/config.py
new file mode 100644
index 0000000..214349d
--- /dev/null
+++ b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/config.py
@@ -0,0 +1,144 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+from typing import Literal
+
+from data_designer.config.base import SingleColumnConfig
+from pydantic import Field, field_validator, model_validator
+from typing_extensions import Self
+
+Difficulty = Literal["simple", "medium", "hard"]
+
+
+class GeneralistAgentEnvColumnConfig(SingleColumnConfig):
+    """Configuration for synthesizing Generalist agent environment tuples.
+
+    The generator consumes a task category column and optional context columns,
+    then writes one structured environment/task/verifier tuple per input row.
+    """
+
+    column_type: Literal["generalist-agent-env"] = "generalist-agent-env"
+
+    task_category_column: str = Field(
+        description="Input column containing the task category, such as 'travel itinerary planning'.",
+    )
+    context_columns: list[str] = Field(
+        default_factory=list,
+        description="Optional seed columns copied into the synthesized sandbox database context.",
+    )
+    difficulty: Difficulty = Field(
+        default="hard",
+        description="Final task difficulty to synthesize after the simple-to-hard iteration trace.",
+    )
+    database_size: int = Field(
+        default=8,
+        ge=3,
+        le=30,
+        description="Number of records to synthesize into the sandbox database for each row.",
+    )
+    required_tag: str | None = Field(
+        default=None,
+        description="Optional tag that every valid solution candidate must contain.",
+    )
+    max_cost: int | None = Field(
+        default=None,
+        ge=1,
+        description="Optional maximum cost constraint for the final task; repaired upward if it makes the task unsat.",
+    )
+    min_score: int | None = Field(
+        default=None,
+        ge=0,
+        le=100,
+        description="Optional minimum score constraint for the final task; repaired downward if it makes the task unsat.",
+    )
+
+    @staticmethod
+    def get_column_emoji() -> str:
+        return "🧰"
+
+    @field_validator("task_category_column")
+    @classmethod
+    def validate_task_category_column(cls, value: str) -> str:
+        """Validate the task category source column name.
+
+        Args:
+            value: Candidate column name.
+
+        Returns:
+            The stripped column name.
+
+        Raises:
+            ValueError: If the column name is empty.
+        """
+        value = value.strip()
+        if not value:
+            raise ValueError("task_category_column must not be empty")
+        return value
+
+    @field_validator("context_columns")
+    @classmethod
+    def validate_context_columns(cls, value: list[str]) -> list[str]:
+        """Validate and de-duplicate context column names.
+
+        Args:
+            value: Candidate context column names.
+
+        Returns:
+            Context column names with duplicates removed while preserving order.
+
+        Raises:
+            ValueError: If any context column name is empty.
+        """
+        columns: list[str] = []
+        for column in value:
+            column = column.strip()
+            if not column:
+                raise ValueError("context_columns must not contain empty column names")
+            if column not in columns:
+                columns.append(column)
+        return columns
+
+    @field_validator("required_tag")
+    @classmethod
+    def validate_required_tag(cls, value: str | None) -> str | None:
+        """Normalize the optional required tag.
+
+        Args:
+            value: Candidate tag value.
+
+        Returns:
+            A lower-cased tag, or ``None`` when unset.
+
+        Raises:
+            ValueError: If the tag contains only whitespace.
+        """
+        if value is None:
+            return None
+        value = value.strip().lower()
+        if not value:
+            raise ValueError("required_tag must not be empty when provided")
+        return value
+
+    @model_validator(mode="after")
+    def validate_distinct_columns(self) -> Self:
+        """Validate cross-field column references.
+
+        Returns:
+            This config instance.
+
+        Raises:
+            ValueError: If the category column is repeated as context.
+        """
+        if self.task_category_column in self.context_columns:
+            raise ValueError("context_columns must not repeat task_category_column")
+        return self
+
+    @property
+    def required_columns(self) -> list[str]:
+        return [self.task_category_column, *self.context_columns]
+
+    @property
+    def side_effect_columns(self) -> list[str]:
+        return []
diff --git a/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/impl.py b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/impl.py
new file mode 100644
index 0000000..c5dd423
--- /dev/null
+++ b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/impl.py
@@ -0,0 +1,858 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+import hashlib
+import json
+import math
+import re
+import textwrap
+from pprint import pformat
+from typing import TYPE_CHECKING, Any
+
+from data_designer.engine.column_generators.generators.base import ColumnGeneratorFullColumn
+
+from data_designer_generalist_agent_env.config import Difficulty, GeneralistAgentEnvColumnConfig
+
+if TYPE_CHECKING:
+    import pandas as pd
+
+BASE_SANDBOX_TOOLS = ["bash", "search"]
+BASE_TAGS = [
+    "budget",
+    "reliable",
+    "fast",
+    "verified",
+    "flexible",
+    "local",
+    "safe",
+    "ranked",
+]
+DIFFICULTY_ORDER: list[Difficulty] = ["simple", "medium", "hard"]
+
+DATABASE_SCHEMA = {
+    "record_id": "Stable row-local identifier.",
+    "name": "Human-readable option name.",
+    "category": "Task category supplied by the seed row.",
+    "summary": "Short synthesized description for search.",
+    "cost": "Integer cost proxy; lower is better.",
+    "duration": "Integer duration proxy.",
+    "score": "Integer quality score from 55 to 100; higher is better.",
+    "tags": "Searchable task-specific labels.",
+    "source_values": "Context columns copied from the seed row.",
+}
+
+TOOL_FUNCTION_SOURCES = {
+    "list_records": '''
+def list_records():
+    """Return every record in the sandbox database."""
+    return [dict(record) for record in DATABASE]
+''',
+    "search_records": '''
+def search_records(query="", max_results=10):
+    """Search database records by name, summary, category, or tag."""
+    needle = str(query or "").casefold()
+    limit = max(0, int(max_results))
+    matches = []
+    for record in DATABASE:
+        haystack = " ".join(
+            [
+                str(record.get("name", "")),
+                str(record.get("summary", "")),
+                str(record.get("category", "")),
+                " ".join(str(tag) for tag in record.get("tags", [])),
+            ],
+        ).casefold()
+        if not needle or needle in haystack:
+            matches.append(dict(record))
+    return matches[:limit]
+''',
+    "get_record": '''
+def get_record(record_id):
+    """Return one record by id, or None when the id is unknown."""
+    for record in DATABASE:
+        if str(record.get("record_id")) == str(record_id):
+            return dict(record)
+    return None
+''',
+    "filter_records": '''
+def filter_records(max_cost=None, min_score=None, required_tag=None):
+    """Filter records by cost, score, and tag constraints."""
+    matches = []
+    for record in DATABASE:
+        if max_cost is not None and int(record["cost"]) > int(max_cost):
+            continue
+        if min_score is not None and int(record["score"]) < int(min_score):
+            continue
+        if required_tag is not None and str(required_tag) not in record.get("tags", []):
+            continue
+        matches.append(dict(record))
+    return matches
+''',
+    "rank_records": '''
+def rank_records(records=None, metric="score", descending=True):
+    """Rank supplied records, or all database records, by a numeric metric."""
+    source = DATABASE if records is None else records
+    return sorted(
+        [dict(record) for record in source],
+        key=lambda record: int(record.get(metric, 0)),
+        reverse=bool(descending),
+    )
+''',
+}
+
+TOOL_DESCRIPTIONS = {
+    "list_records": "Inspect all rows in the hidden sandbox database.",
+    "search_records": "Retrieve category-relevant records through a search-style interface.",
+    "get_record": "Fetch one database record by identifier.",
+    "filter_records": "Apply verifier-aligned constraints without exposing the database directly.",
+    "rank_records": "Rank candidate records for the final combinatorial selection step.",
+}
+
+
+def normalize_cell(value: object) -> str:
+    """Normalize one pandas cell into a stable text value.
+
+    Args:
+        value: Cell value from a seed row.
+
+    Returns:
+        A stripped string, or an empty string for null-like values.
+    """
+    if value is None:
+        return ""
+    if isinstance(value, float) and math.isnan(value):
+        return ""
+    try:
+        if value != value:
+            return ""
+    except (TypeError, ValueError):
+        pass
+    return str(value).strip()
+
+
+def slugify(value: str, fallback: str) -> str:
+    """Convert text into a stable lowercase identifier fragment.
+
+    Args:
+        value: Input text.
+        fallback: Value to use when no identifier characters remain.
+
+    Returns:
+        A slug containing lowercase letters, digits, and hyphens.
+    """
+    slug = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-")
+    return slug or fallback
+
+
+def stable_int(seed: str, modulo: int) -> int:
+    """Hash text into a deterministic integer range.
+
+    Args:
+        seed: Hash seed.
+        modulo: Exclusive upper bound.
+
+    Returns:
+        A deterministic integer in ``[0, modulo)``.
+    """
+    digest = hashlib.sha256(seed.encode("utf-8")).hexdigest()
+    return int(digest[:12], 16) % modulo
+
+
+def unique_values(values: list[str]) -> list[str]:
+    """Return values with duplicates removed while preserving order.
+
+    Args:
+        values: Candidate values.
+
+    Returns:
+        De-duplicated values.
+    """
+    result: list[str] = []
+    for value in values:
+        if value and value not in result:
+            result.append(value)
+    return result
+
+
+def context_tags(category: str, context_values: dict[str, str], required_tag: str | None) -> list[str]:
+    """Build a task-specific tag vocabulary.
+
+    Args:
+        category: Seed task category.
+        context_values: Row context values.
+        required_tag: Optional tag that must be present in the database.
+
+    Returns:
+        A non-empty tag list used to populate synthesized records.
+    """
+    seed_text = " ".join([category, *context_values.values()])
+    words = [word for word in re.findall(r"[a-z0-9]+", seed_text.lower()) if len(word) > 3]
+    tags = unique_values([required_tag or "", *words[:6], *BASE_TAGS])
+    return tags[:12]
+
+
+def build_context_summary(context_values: dict[str, str]) -> str:
+    """Summarize row context for record descriptions.
+
+    Args:
+        context_values: Context columns extracted from the seed row.
+
+    Returns:
+        Compact text suitable for generated summaries.
+    """
+    if not context_values:
+        return "seed category only"
+    return "; ".join(f"{name}: {value}" for name, value in context_values.items() if value) or "empty context"
+
+
+def build_database(
+    category: str,
+    context_values: dict[str, str],
+    database_size: int,
+    required_tag: str | None,
+) -> list[dict[str, Any]]:
+    """Synthesize a row-local sandbox database.
+
+    Args:
+        category: Seed task category.
+        context_values: Optional context copied from the input row.
+        database_size: Number of database records to create.
+        required_tag: Optional tag that must be inserted into at least one record.
+
+    Returns:
+        JSON-compatible database records.
+    """
+    category_slug = slugify(category, "task")
+    seed_context = json.dumps(context_values, sort_keys=True)
+    tags = context_tags(category, context_values, required_tag)
+    context_summary = build_context_summary(context_values)
+    records: list[dict[str, Any]] = []
+
+    for position in range(database_size):
+        record_seed = f"{category}|{seed_context}|{position}"
+        cost = 80 + stable_int(f"{record_seed}|cost", 920)
+        duration = 1 + stable_int(f"{record_seed}|duration", 14)
+        score = 55 + stable_int(f"{record_seed}|score", 46)
+        tag_start = stable_int(f"{record_seed}|tags", len(tags))
+        record_tags = [tags[(tag_start + offset) % len(tags)] for offset in range(min(3, len(tags)))]
+        if required_tag and position == 0 and required_tag not in record_tags:
+            record_tags[0] = required_tag
+        name = f"{category.title()} Option {position + 1}"
+        records.append(
+            {
+                "record_id": f"{category_slug}-{position + 1:03d}",
+                "name": name,
+                "category": category,
+                "summary": f"{name} synthesized from {context_summary}.",
+                "cost": cost,
+                "duration": duration,
+                "score": score,
+                "tags": unique_values(record_tags),
+                "source_values": dict(context_values),
+            }
+        )
+
+    return records
+
+
+def record_matches_constraints(record: dict[str, Any], constraints: dict[str, Any]) -> bool:
+    """Return whether a record satisfies task constraints.
+
+    Args:
+        record: Database record.
+        constraints: Task constraints.
+
+    Returns:
+        ``True`` when the record is eligible.
+    """
+    required_tag = constraints.get("required_tag")
+    return (
+        int(record["cost"]) <= int(constraints["max_cost"])
+        and int(record["score"]) >= int(constraints["min_score"])
+        and (required_tag is None or str(required_tag) in record.get("tags", []))
+    )
+
+
+def eligible_records(database: list[dict[str, Any]], constraints: dict[str, Any]) -> list[dict[str, Any]]:
+    """Filter records that satisfy task constraints.
+
+    Args:
+        database: Sandbox database records.
+        constraints: Task constraints.
+
+    Returns:
+        Eligible records.
+    """
+    return [record for record in database if record_matches_constraints(record, constraints)]
+
+
+def select_best_record(records: list[dict[str, Any]]) -> dict[str, Any] | None:
+    """Select the optimal answer under the verifier ordering.
+
+    Args:
+        records: Candidate records.
+
+    Returns:
+        The best record, or ``None`` when no candidates exist.
+    """
+    if not records:
+        return None
+    return sorted(records, key=lambda record: (-int(record["score"]), int(record["cost"]), str(record["record_id"])))[0]
+
+
+def default_constraints(
+    database: list[dict[str, Any]],
+    config: GeneralistAgentEnvColumnConfig,
+    difficulty: Difficulty | None = None,
+) -> dict[str, Any]:
+    """Create feasible default constraints for the requested difficulty.
+
+    Args:
+        database: Sandbox database records.
+        config: Column configuration.
+        difficulty: Difficulty to synthesize; defaults to the configured final difficulty.
+
+    Returns:
+        Constraint values and repair notes.
+    """
+    difficulty = difficulty or config.difficulty
+    required_tag = config.required_tag
+    target_pool = [record for record in database if required_tag is None or required_tag in record["tags"]]
+    target = select_best_record(target_pool) or select_best_record(database)
+    if target is None:
+        msg = "database must contain at least one record"
+        raise ValueError(msg)
+
+    if required_tag is None and difficulty in ("medium", "hard"):
+        required_tag = str(target["tags"][0])
+
+    if difficulty == "simple":
+        default_max_cost = max(int(record["cost"]) for record in database)
+        default_min_score = min(int(record["score"]) for record in database)
+    elif difficulty == "medium":
+        default_max_cost = int(target["cost"]) + 120
+        default_min_score = max(0, int(target["score"]) - 12)
+    else:
+        default_max_cost = int(target["cost"]) + 40
+        default_min_score = max(0, int(target["score"]) - 4)
+
+    constraints = {
+        "max_cost": config.max_cost if config.max_cost is not None else default_max_cost,
+        "min_score": config.min_score if config.min_score is not None else default_min_score,
+        "required_tag": required_tag,
+        "repair_notes": [],
+    }
+    return repair_constraints(database, constraints)
+
+
+def repair_constraints(database: list[dict[str, Any]], constraints: dict[str, Any]) -> dict[str, Any]:
+    """Repair constraints that would otherwise make the task unsatisfiable.
+
+    Args:
+        database: Sandbox database records.
+        constraints: Initial task constraints.
+
+    Returns:
+        Feasible constraints plus repair notes.
+    """
+    if eligible_records(database, constraints):
+        return constraints
+
+    required_tag = constraints.get("required_tag")
+    target_pool = [record for record in database if required_tag is None or required_tag in record["tags"]]
+    target = select_best_record(target_pool) or select_best_record(database)
+    if target is None:
+        return constraints
+
+    if required_tag is not None and required_tag not in target["tags"]:
+        constraints["required_tag"] = target["tags"][0]
+        constraints["repair_notes"].append("required_tag changed to a tag present in the database")
+
+    if int(target["cost"]) > int(constraints["max_cost"]):
+        constraints["max_cost"] = int(target["cost"])
+        constraints["repair_notes"].append("max_cost increased to keep at least one valid candidate")
+
+    if int(target["score"]) < int(constraints["min_score"]):
+        constraints["min_score"] = int(target["score"])
+        constraints["repair_notes"].append("min_score decreased to keep at least one valid candidate")
+
+    return constraints
+
+
+def selected_tool_names(difficulty: Difficulty) -> list[str]:
+    """Select the synthesized toolset for a difficulty level.
+
+    Args:
+        difficulty: Final task difficulty.
+
+    Returns:
+        Tool names to expose to the solution function.
+    """
+    tool_names = ["list_records", "search_records", "get_record"]
+    if difficulty in ("medium", "hard"):
+        tool_names.append("filter_records")
+    if difficulty == "hard":
+        tool_names.append("rank_records")
+    return tool_names
+
+
+def build_tool_specs(tool_names: list[str]) -> list[dict[str, str]]:
+    """Build tool metadata and function source snippets.
+
+    Args:
+        tool_names: Selected tool names.
+
+    Returns:
+        Tool descriptors for the output tuple.
+    """
+    return [
+        {
+            "name": tool_name,
+            "description": TOOL_DESCRIPTIONS[tool_name],
+            "source": textwrap.dedent(TOOL_FUNCTION_SOURCES[tool_name]).strip(),
+        }
+        for tool_name in tool_names
+    ]
+
+
+def build_tool_module_source(database: list[dict[str, Any]], tool_names: list[str]) -> str:
+    """Build executable Python source for the synthesized tool module.
+
+    Args:
+        database: Hidden sandbox database.
+        tool_names: Selected tool names.
+
+    Returns:
+        Python module source defining ``DATABASE`` and tool functions.
+    """
+    parts = [f"DATABASE = {pformat(database, sort_dicts=False, width=120)}"]
+    parts.extend(textwrap.dedent(TOOL_FUNCTION_SOURCES[tool_name]).strip() for tool_name in tool_names)
+    return "\n\n".join(parts) + "\n"
+
+
+def build_task_prompt(category: str, difficulty: Difficulty, constraints: dict[str, Any]) -> str:
+    """Create the task prompt presented to a solving agent.
+
+    Args:
+        category: Seed task category.
+        difficulty: Final task difficulty.
+        constraints: Task constraints.
+
+    Returns:
+        Natural language task prompt.
+    """
+    clauses = [
+        f"Use the synthesized tools to solve this {difficulty} {category!r} task.",
+        "Return the record_id for the eligible database record with the highest score.",
+        f"Only consider records with cost <= {constraints['max_cost']} and score >= {constraints['min_score']}.",
+    ]
+    if constraints.get("required_tag") is not None:
+        clauses.append(f"The record must include the tag {constraints['required_tag']!r}.")
+    clauses.append("Break ties by lower cost, then lexicographic record_id.")
+    return " ".join(clauses)
+
+
+def build_reference_answer(database: list[dict[str, Any]], constraints: dict[str, Any]) -> dict[str, Any]:
+    """Compute the verifier's expected answer.
+
+    Args:
+        database: Sandbox database records.
+        constraints: Task constraints.
+
+    Returns:
+        JSON-compatible answer object.
+    """
+    best = select_best_record(eligible_records(database, constraints))
+    if best is None:
+        return {"record_id": None, "reason": "no eligible records"}
+    return {
+        "record_id": best["record_id"],
+        "score": best["score"],
+        "cost": best["cost"],
+        "tags": list(best["tags"]),
+    }
+
+
+def verify_answer(answer: dict[str, Any], database: list[dict[str, Any]], constraints: dict[str, Any]) -> bool:
+    """Verify an answer against the database and constraints.
+
+    Args:
+        answer: Candidate answer.
+        database: Sandbox database records.
+        constraints: Task constraints.
+
+    Returns:
+        ``True`` when the answer is exactly the verifier-optimal record.
+    """
+    if not isinstance(answer, dict):
+        return False
+    best = select_best_record(eligible_records(database, constraints))
+    if best is None:
+        return answer.get("record_id") is None
+    return (
+        answer.get("record_id") == best["record_id"]
+        and int(answer.get("score", -1)) == int(best["score"])
+        and int(answer.get("cost", -1)) == int(best["cost"])
+    )
+
+
+def build_solution_source(constraints: dict[str, Any], difficulty: Difficulty) -> str:
+    """Build a tool-only Python solution function.
+
+    Args:
+        constraints: Task constraints.
+        difficulty: Final task difficulty.
+
+    Returns:
+        Python source defining ``solve(tools)``.
+    """
+    required_tag = repr(constraints.get("required_tag"))
+    lines = [
+        "def solve(tools):",
+        '    """Solve the task using only synthesized tool functions and local logic."""',
+    ]
+
+    if difficulty == "simple":
+        lines.extend(
+            [
+                "    candidates = []",
+                '    for record in tools["list_records"]():',
+                f'        if int(record["cost"]) > {constraints["max_cost"]}:',
+                "            continue",
+                f'        if int(record["score"]) < {constraints["min_score"]}:',
+                "            continue",
+                f'        if {required_tag} is not None and {required_tag} not in record.get("tags", []):',
+                "            continue",
+                "        candidates.append(record)",
+            ]
+        )
+    else:
+        lines.extend(
+            [
+                '    candidates = tools["filter_records"](',
+                f"        max_cost={constraints['max_cost']},",
+                f"        min_score={constraints['min_score']},",
+                f"        required_tag={required_tag},",
+                "    )",
+            ]
+        )
+
+    lines.extend(
+        [
+            "    if not candidates:",
+            '        return {"record_id": None, "reason": "no eligible records"}',
+        ]
+    )
+    if difficulty == "hard":
+        lines.extend(
+            [
+                '    ranked = tools["rank_records"](candidates, metric="score", descending=True)',
+                '    ranked = sorted(ranked, key=lambda record: (-int(record["score"]), int(record["cost"]), str(record["record_id"])))',
+            ]
+        )
+    else:
+        lines.append(
+            '    ranked = sorted(candidates, key=lambda record: (-int(record["score"]), int(record["cost"]), str(record["record_id"])))'
+        )
+
+    lines.extend(
+        [
+            "    best = ranked[0]",
+            "    return {",
+            '        "record_id": best["record_id"],',
+            '        "score": best["score"],',
+            '        "cost": best["cost"],',
+            '        "tags": list(best.get("tags", [])),',
+            "    }",
+        ]
+    )
+    return "\n".join(lines)
+
+
+def build_verifier_source(constraints: dict[str, Any]) -> str:
+    """Build a Python verifier function for the synthesized task.
+
+    Args:
+        constraints: Task constraints.
+
+    Returns:
+        Python source defining ``verify(answer, database)``.
+    """
+    verifier_constraints = {
+        "max_cost": constraints["max_cost"],
+        "min_score": constraints["min_score"],
+        "required_tag": constraints.get("required_tag"),
+    }
+    return textwrap.dedent(
+        f'''
+        CONSTRAINTS = {pformat(verifier_constraints, sort_dicts=False, width=120)}
+
+
+        def verify(answer, database):
+            """Return True when answer satisfies the task and is verifier-optimal."""
+            if not isinstance(answer, dict):
+                return False
+
+            eligible = []
+            for record in database:
+                if int(record["cost"]) > int(CONSTRAINTS["max_cost"]):
+                    continue
+                if int(record["score"]) < int(CONSTRAINTS["min_score"]):
+                    continue
+                required_tag = CONSTRAINTS.get("required_tag")
+                if required_tag is not None and str(required_tag) not in record.get("tags", []):
+                    continue
+                eligible.append(record)
+
+            if not eligible:
+                return answer.get("record_id") is None
+
+            best = sorted(
+                eligible,
+                key=lambda record: (-int(record["score"]), int(record["cost"]), str(record["record_id"])),
+            )[0]
+            return (
+                answer.get("record_id") == best["record_id"]
+                and int(answer.get("score", -1)) == int(best["score"])
+                and int(answer.get("cost", -1)) == int(best["cost"])
+            )
+        '''
+    ).strip()
+
+
+def build_task_iteration(
+    category: str,
+    database: list[dict[str, Any]],
+    config: GeneralistAgentEnvColumnConfig,
+    difficulty: Difficulty,
+) -> dict[str, Any]:
+    """Build one synthesized task, solution, and verifier iteration.
+
+    Args:
+        category: Seed task category.
+        database: Sandbox database records.
+        config: Column configuration.
+        difficulty: Difficulty level for this iteration.
+
+    Returns:
+        JSON-compatible iteration artifact.
+    """
+    constraints = default_constraints(database, config, difficulty)
+    answer = build_reference_answer(database, constraints)
+    verified = verify_answer(answer, database, constraints)
+    return {
+        "difficulty": difficulty,
+        "tool_names": selected_tool_names(difficulty),
+        "task_prompt": build_task_prompt(category, difficulty, constraints),
+        "constraints": constraints,
+        "solution_source": build_solution_source(constraints, difficulty),
+        "verifier_source": build_verifier_source(constraints),
+        "reference_answer": answer,
+        "reference_solution_passed": verified,
+        "augmentation_required": difficulty in ("medium", "hard"),
+    }
+
+
+def build_task_iterations(
+    category: str,
+    database: list[dict[str, Any]],
+    config: GeneralistAgentEnvColumnConfig,
+) -> list[dict[str, Any]]:
+    """Build the simple-to-final task synthesis iterations.
+
+    Args:
+        category: Seed task category.
+        database: Sandbox database records.
+        config: Column configuration.
+
+    Returns:
+        Ordered task iteration artifacts.
+    """
+    return [
+        build_task_iteration(category, database, config, difficulty)
+        for difficulty in difficulty_trace(config.difficulty)
+    ]
+
+
+def difficulty_trace(final_difficulty: Difficulty) -> list[Difficulty]:
+    """List difficulty levels synthesized before the final task.
+
+    Args:
+        final_difficulty: Requested final difficulty.
+
+    Returns:
+        Ordered difficulty names through the final level.
+    """
+    return DIFFICULTY_ORDER[: DIFFICULTY_ORDER.index(final_difficulty) + 1]
+
+
+def build_synthesis_trace(
+    category: str,
+    difficulty: Difficulty,
+    tool_names: list[str],
+    constraints: dict[str, Any],
+    verified: bool,
+) -> list[dict[str, Any]]:
+    """Describe the Generalist-style synthesis workflow for one row.
+
+    Args:
+        category: Seed task category.
+        difficulty: Final task difficulty.
+        tool_names: Synthesized tool names.
+        constraints: Final task constraints.
+        verified: Whether the generated reference answer passes verification.
+
+    Returns:
+        Ordered workflow events.
+    """
+    trace: list[dict[str, Any]] = [
+        {
+            "stage": "environment_and_toolset_construction",
+            "category": category,
+            "sandbox_tools": list(BASE_SANDBOX_TOOLS),
+            "database_created": True,
+        }
+    ]
+    for level in difficulty_trace(difficulty):
+        trace.append(
+            {
+                "stage": "task_synthesis",
+                "difficulty": level,
+                "goal": "hard to solve through search, easy to verify by deterministic constraints",
+            }
+        )
+        if level in ("medium", "hard"):
+            trace.append(
+                {
+                    "stage": "toolset_augmentation",
+                    "difficulty": level,
+                    "available_tools": selected_tool_names(level),
+                }
+            )
+    trace.append(
+        {
+            "stage": "solution_generation",
+            "solution_restriction": "solution source calls synthesized tools and uses local logical computation only",
+            "final_tools": tool_names,
+        }
+    )
+    trace.append(
+        {
+            "stage": "verification",
+            "constraints": {key: value for key, value in constraints.items() if key != "repair_notes"},
+            "reference_solution_passed": verified,
+        }
+    )
+    return trace
+
+
+def build_environment_tuple(
+    category: str,
+    context_values: dict[str, str],
+    config: GeneralistAgentEnvColumnConfig,
+    row_number: int,
+) -> dict[str, Any]:
+    """Build one ``<environment, tools, task, verifier>`` tuple.
+
+    Args:
+        category: Seed task category.
+        context_values: Context copied from the seed row.
+        config: Column configuration.
+        row_number: Zero-based row position used for stable ids.
+
+    Returns:
+        Structured Generalist environment tuple.
+    """
+    database = build_database(category, context_values, config.database_size, config.required_tag)
+    task_iterations = build_task_iterations(category, database, config)
+    final_iteration = task_iterations[-1]
+    constraints = final_iteration["constraints"]
+    answer = final_iteration["reference_answer"]
+    verified = bool(final_iteration["reference_solution_passed"])
+    tool_names = final_iteration["tool_names"]
+    category_slug = slugify(category, "task")
+    context_slug = stable_int(json.dumps(context_values, sort_keys=True), 10_000)
+    environment_id = f"{category_slug}-{row_number + 1:04d}-{context_slug:04d}"
+
+    return {
+        "schema_version": "generalist-agent-env/v1",
+        "source_workflow": "DeepSeek-V3.2 Generalist automatic environment synthesis",
+        "environment": {
+            "environment_id": environment_id,
+            "category": category,
+            "sandbox": {
+                "base_tools": list(BASE_SANDBOX_TOOLS),
+                "database_name": f"{environment_id}_db",
+                "database_schema": dict(DATABASE_SCHEMA),
+            },
+            "database": database,
+            "database_record_count": len(database),
+            "source_context": dict(context_values),
+            "data_acquisition": {
+                "mode": "synthetic",
+                "base_sandbox_tools": list(BASE_SANDBOX_TOOLS),
+                "note": "Records are generated locally from seed data; downstream workflows may replace them with search-retrieved records.",
+            },
+        },
+        "tools": build_tool_specs(tool_names),
+        "tool_module_source": build_tool_module_source(database, tool_names),
+        "task": {
+            "difficulty": config.difficulty,
+            "category": category,
+            "prompt": build_task_prompt(category, config.difficulty, constraints),
+            "constraints": constraints,
+            "answer_schema": {
+                "record_id": "string or null",
+                "score": "integer when record_id is not null",
+                "cost": "integer when record_id is not null",
+                "tags": "list of strings when record_id is not null",
+            },
+        },
+        "solution": {
+            "language": "python",
+            "entrypoint": "solve",
+            "source": final_iteration["solution_source"],
+            "restrictions": [
+                "may call synthesized tool functions",
+                "may perform local logical computation",
+                "must not directly access the sandbox database",
+            ],
+        },
+        "verifier": {
+            "language": "python",
+            "entrypoint": "verify",
+            "source": final_iteration["verifier_source"],
+            "reference_solution_passed": verified,
+        },
+        "reference_answer": answer,
+        "task_iterations": task_iterations,
+        "synthesis_trace": build_synthesis_trace(category, config.difficulty, tool_names, constraints, verified),
+        "rl_filter_note": "Downstream RL retention can keep generated tuples with non-zero pass@100.",
+    }
+
+
+class GeneralistAgentEnvColumnGenerator(ColumnGeneratorFullColumn[GeneralistAgentEnvColumnConfig]):
+    """Generate Generalist agent environment tuples for each input row."""
+
+    def generate(self, data: pd.DataFrame) -> pd.DataFrame:
+        """Generate structured environment tuples.
+
+        Args:
+            data: Input DataFrame containing the configured task category and context columns.
+
+        Returns:
+            The input DataFrame with the configured output column populated.
+        """
+        tuples: list[dict[str, Any]] = []
+        for row_number, (_, row) in enumerate(data.iterrows()):
+            category = normalize_cell(row[self.config.task_category_column]) or "general task"
+            context_values = {
+                column: normalize_cell(row[column])
+                for column in self.config.context_columns
+                if normalize_cell(row[column])
+            }
+            tuples.append(build_environment_tuple(category, context_values, self.config, row_number))
+        data[self.config.name] = tuples
+        return data
diff --git a/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/plugin.py b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/plugin.py
new file mode 100644
index 0000000..c1f5f3c
--- /dev/null
+++ b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/plugin.py
@@ -0,0 +1,10 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from data_designer.plugins.plugin import Plugin, PluginType
+
+plugin = Plugin(
+    config_qualified_name="data_designer_generalist_agent_env.config.GeneralistAgentEnvColumnConfig",
+    impl_qualified_name="data_designer_generalist_agent_env.impl.GeneralistAgentEnvColumnGenerator",
+    plugin_type=PluginType.COLUMN_GENERATOR,
+)
diff --git a/plugins/data-designer-generalist-agent-env/tests/test_plugin.py b/plugins/data-designer-generalist-agent-env/tests/test_plugin.py
new file mode 100644
index 0000000..6c5a803
--- /dev/null
+++ b/plugins/data-designer-generalist-agent-env/tests/test_plugin.py
@@ -0,0 +1,212 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from pathlib import Path
+from typing import Any
+
+import pandas as pd
+import pytest
+from data_designer.config.config_builder import DataDesignerConfigBuilder
+from data_designer.config.seed_source_dataframe import DataFrameSeedSource
+from data_designer.engine.testing.utils import assert_valid_plugin
+from data_designer.interface.data_designer import DataDesigner
+from pydantic import ValidationError
+
+from data_designer_generalist_agent_env.config import GeneralistAgentEnvColumnConfig
+from data_designer_generalist_agent_env.impl import (
+    GeneralistAgentEnvColumnGenerator,
+    build_environment_tuple,
+    build_reference_answer,
+    default_constraints,
+    selected_tool_names,
+)
+from data_designer_generalist_agent_env.plugin import plugin
+
+
+def test_valid_plugin() -> None:
+    assert_valid_plugin(plugin)
+
+
+def make_generator(config: GeneralistAgentEnvColumnConfig) -> GeneralistAgentEnvColumnGenerator:
+    """Create a generator instance without requiring a ResourceProvider."""
+    generator = GeneralistAgentEnvColumnGenerator.__new__(GeneralistAgentEnvColumnGenerator)
+    generator._config = config
+    return generator
+
+
+def run_generated_sources(environment_tuple: dict[str, Any]) -> tuple[dict[str, Any], bool]:
+    """Execute generated tool, solution, and verifier source for one tuple."""
+    tool_namespace: dict[str, Any] = {}
+    exec(environment_tuple["tool_module_source"], tool_namespace)
+    tools = {tool["name"]: tool_namespace[tool["name"]] for tool in environment_tuple["tools"]}
+
+    solution_namespace: dict[str, Any] = {}
+    exec(environment_tuple["solution"]["source"], solution_namespace)
+    answer = solution_namespace["solve"](tools)
+
+    verifier_namespace: dict[str, Any] = {}
+    exec(environment_tuple["verifier"]["source"], verifier_namespace)
+    verified = verifier_namespace["verify"](answer, environment_tuple["environment"]["database"])
+    return answer, verified
+
+
+class TestGeneralistAgentEnvColumnConfig:
+    def test_required_columns_include_category_and_context(self) -> None:
+        config = GeneralistAgentEnvColumnConfig(
+            name="agent_env",
+            task_category_column="category",
+            context_columns=["constraints", "persona", "constraints"],
+        )
+
+        assert config.required_columns == ["category", "constraints", "persona"]
+        assert config.side_effect_columns == []
+
+    def test_column_emoji(self) -> None:
+        config = GeneralistAgentEnvColumnConfig(name="agent_env", task_category_column="category")
+
+        assert config.get_column_emoji() == "🧰"
+
+    def test_rejects_empty_category_column(self) -> None:
+        with pytest.raises(ValidationError, match="task_category_column must not be empty"):
+            GeneralistAgentEnvColumnConfig(name="agent_env", task_category_column=" ")
+
+    def test_rejects_category_repeated_as_context(self) -> None:
+        with pytest.raises(ValidationError, match="context_columns must not repeat task_category_column"):
+            GeneralistAgentEnvColumnConfig(
+                name="agent_env",
+                task_category_column="category",
+                context_columns=["category"],
+            )
+
+    def test_normalizes_required_tag(self) -> None:
+        config = GeneralistAgentEnvColumnConfig(
+            name="agent_env",
+            task_category_column="category",
+            required_tag="  Family  ",
+        )
+
+        assert config.required_tag == "family"
+
+
+class TestGeneralistAgentEnvHelpers:
+    def test_tool_names_follow_difficulty(self) -> None:
+        assert selected_tool_names("simple") == ["list_records", "search_records", "get_record"]
+        assert selected_tool_names("medium") == ["list_records", "search_records", "get_record", "filter_records"]
+        assert selected_tool_names("hard") == [
+            "list_records",
+            "search_records",
+            "get_record",
+            "filter_records",
+            "rank_records",
+        ]
+
+    def test_reference_answer_is_verifier_optimal(self) -> None:
+        config = GeneralistAgentEnvColumnConfig(
+            name="agent_env",
+            task_category_column="category",
+            difficulty="hard",
+            required_tag="family",
+        )
+        environment_tuple = build_environment_tuple(
+            "planning a travel itinerary",
+            {"city": "Seoul", "budget": "1200"},
+            config,
+            row_number=0,
+        )
+
+        answer, verified = run_generated_sources(environment_tuple)
+
+        assert verified is True
+        assert answer == environment_tuple["reference_answer"]
+        assert environment_tuple["verifier"]["reference_solution_passed"] is True
+        assert environment_tuple["task"]["constraints"]["required_tag"] == "family"
+
+    def test_constraints_are_repaired_when_user_values_are_unsat(self) -> None:
+        config = GeneralistAgentEnvColumnConfig(
+            name="agent_env",
+            task_category_column="category",
+            required_tag="rare",
+            max_cost=1,
+            min_score=100,
+        )
+        environment_tuple = build_environment_tuple("debugging a build failure", {}, config, row_number=0)
+        database = environment_tuple["environment"]["database"]
+        constraints = default_constraints(database, config)
+        answer = build_reference_answer(database, constraints)
+
+        assert constraints["repair_notes"]
+        assert answer["record_id"] is not None
+
+
+class TestGeneralistAgentEnvColumnGenerator:
+    def test_generate_creates_environment_tuple(self) -> None:
+        source_df = pd.DataFrame(
+            {
+                "category": ["planning a travel itinerary"],
+                "constraints": ["visit museums and stay under a moderate budget"],
+            }
+        )
+        config = GeneralistAgentEnvColumnConfig(
+            name="agent_env",
+            task_category_column="category",
+            context_columns=["constraints"],
+        )
+        generator = make_generator(config)
+
+        result = generator.generate(source_df)
+        environment_tuple = result.loc[0, "agent_env"]
+
+        assert environment_tuple["schema_version"] == "generalist-agent-env/v1"
+        assert environment_tuple["environment"]["sandbox"]["base_tools"] == ["bash", "search"]
+        assert environment_tuple["environment"]["database_record_count"] == config.database_size
+        assert {tool["name"] for tool in environment_tuple["tools"]} == set(selected_tool_names("hard"))
+        assert environment_tuple["task"]["difficulty"] == "hard"
+        assert [iteration["difficulty"] for iteration in environment_tuple["task_iterations"]] == [
+            "simple",
+            "medium",
+            "hard",
+        ]
+        assert all(iteration["reference_solution_passed"] for iteration in environment_tuple["task_iterations"])
+        assert environment_tuple["solution"]["restrictions"] == [
+            "may call synthesized tool functions",
+            "may perform local logical computation",
+            "must not directly access the sandbox database",
+        ]
+
+    def test_generated_python_sources_pass_verifier(self) -> None:
+        source_df = pd.DataFrame({"category": ["planning a travel itinerary"]})
+        config = GeneralistAgentEnvColumnConfig(name="agent_env", task_category_column="category")
+        generator = make_generator(config)
+        result = generator.generate(source_df)
+
+        answer, verified = run_generated_sources(result.loc[0, "agent_env"])
+
+        assert verified is True
+        assert answer["record_id"]
+
+
+class TestGeneralistAgentEnvPreviewIntegration:
+    def test_preview_generates_environment_tuple(self, tmp_path: Path) -> None:
+        seed_df = pd.DataFrame(
+            {
+                "category": ["planning a travel itinerary"],
+                "constraints": ["compare candidate plans by score, cost, and family suitability"],
+            }
+        )
+
+        builder = DataDesignerConfigBuilder()
+        builder.with_seed_dataset(DataFrameSeedSource(df=seed_df))
+        builder.add_column(
+            name="agent_env",
+            column_type="generalist-agent-env",
+            task_category_column="category",
+            context_columns=["constraints"],
+            required_tag="family",
+        )
+
+        result = DataDesigner(artifact_path=tmp_path / "artifacts").preview(builder, num_records=1)
+
+        assert result.dataset is not None
+        environment_tuple = result.dataset.loc[0, "agent_env"]
+        assert environment_tuple["task"]["constraints"]["required_tag"] == "family"
+        assert environment_tuple["verifier"]["reference_solution_passed"] is True
diff --git a/pyproject.toml b/pyproject.toml
index cffc13f..34b9263 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,7 +46,7 @@ ignore = [
 ]
 
 [tool.ruff.lint.isort]
-known-first-party = ["ddp", "data_designer_template"]
+known-first-party = ["ddp", "data_designer_generalist_agent_env", "data_designer_template"]
 
 [tool.ruff.lint.flake8-tidy-imports]
 ban-relative-imports = "all"
diff --git a/uv.lock b/uv.lock
index 781f8c8..90231e2 100644
--- a/uv.lock
+++ b/uv.lock
@@ -9,6 +9,7 @@ resolution-markers = [
 
 [manifest]
 members = [
+    "data-designer-generalist-agent-env",
     "data-designer-plugins-workspace",
     "data-designer-template",
     "ddp",
@@ -353,7 +354,7 @@ wheels = [
 
 [[package]]
 name = "data-designer"
-version = "0.5.7"
+version = "0.5.9"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "data-designer-config" },
@@ -361,14 +362,14 @@ dependencies = [
     { name = "prompt-toolkit" },
     { name = "typer" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/dd/4b/00aeaaf364f1a7efbf5103954196ca351cdecc6d65203e5a7e4e33a69a2b/data_designer-0.5.7.tar.gz", hash = "sha256:374f9d15f7774fb5a79935b9e6ce989b7b5c364a8d1e0ce0e6e792258376b1a3", size = 120078, upload-time = "2026-04-17T22:03:14.088Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/21/a5/ee29d0f858e8b36e348fc4b99929384865a31cbce81c4e303bf96d6a39c7/data_designer-0.5.9.tar.gz", hash = "sha256:2b50e075bf4f58532fb22dea5aa777fe5679050b57e540ab7f51de33e8d74299", size = 120115, upload-time = "2026-04-28T23:23:04.673Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/18/93/ddadb9707ba8bde858bcca5cdfcaa016426b9ae9e5ce3bbfbaf3813a281f/data_designer-0.5.7-py3-none-any.whl", hash = "sha256:ec4f162b1c248c8d7fe81a8ca19c246998e0bb557f8dfbe629b8c85ac7e68182", size = 99133, upload-time = "2026-04-17T22:03:12.507Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/2b/b6adf669d53d04d2c5c78608974f8e68d32cb22334e1bbbb0c4998ac34db/data_designer-0.5.9-py3-none-any.whl", hash = "sha256:5c583d635fc26e5effe63f96001d1d11cdb8a5e23df96d1855780a7224559b8e", size = 99164, upload-time = "2026-04-28T23:23:03.397Z" },
 ]
 
 [[package]]
 name = "data-designer-config"
-version = "0.5.7"
+version = "0.5.9"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "jinja2" },
@@ -384,14 +385,14 @@ dependencies = [
     { name = "requests" },
     { name = "rich" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ac/b6/e1b29e2fc98322f9865f20a0c3baa18972bbe353b65dd52c7f9786f8b9c5/data_designer_config-0.5.7.tar.gz", hash = "sha256:248b28ad2ec446599614e4656bae443ba9a9f3805e14ab478374fb34eb89636d", size = 128660, upload-time = "2026-04-17T22:03:06.42Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/a4/304374c08ede51262aaeb59848adb6a3cf8b18811847111e6152821ecebd/data_designer_config-0.5.9.tar.gz", hash = "sha256:401cad60ac68f15f05d694c4a053f8ee9001609838f96f6022394884eab10bba", size = 128778, upload-time = "2026-04-28T23:22:57.847Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/65/fc/a85d1d9d7436e4ebccc11b1fc7c0be1287584f4bee3a9ed71d58da0d0b0a/data_designer_config-0.5.7-py3-none-any.whl", hash = "sha256:3d4d22c4d8e4b36189f62ef103122a108add1cfbbacf8afcfdd281e9458bd77d", size = 114479, upload-time = "2026-04-17T22:03:04.993Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/ea/e46d62abe4eeb2180bf1c512930501dcee85b606974fb4eebb0be5180c23/data_designer_config-0.5.9-py3-none-any.whl", hash = "sha256:13f0b3232db3f565c8eb759b08763bf66ed0da9f712efe451d242cb81ec396ee", size = 114597, upload-time = "2026-04-28T23:22:56.406Z" },
 ]
 
 [[package]]
 name = "data-designer-engine"
-version = "0.5.7"
+version = "0.5.9"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyascii" },
@@ -417,11 +418,22 @@ dependencies = [
     { name = "sqlfluff" },
     { name = "tiktoken" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/5c/35/a8abd88c44aa603bacff33d6983959b95bdc4c0116fc03460fb4ef04f803/data_designer_engine-0.5.7.tar.gz", hash = "sha256:f1dfeaad52a12fe12bf9796ae45dddb9d1eed82bdb02979d6cdab8c723631651", size = 794680, upload-time = "2026-04-17T22:03:10.25Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f7/c9/16a5d199fcb30f045cfc83e26a3a69dcf06c6e0387b13f98f43d8ed345e7/data_designer_engine-0.5.9.tar.gz", hash = "sha256:3adf2185404b156fe7ebc03d022eef2398c96f033a31cf96434b1eaa81f51cea", size = 799004, upload-time = "2026-04-28T23:23:01.811Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/35/d4/3844529ae989be9e63b0b8f47c28492793993427dc7d54d6d2a923ad2acc/data_designer_engine-0.5.7-py3-none-any.whl", hash = "sha256:75cd7d5ad0b230ddf75950ba7f97c9ad75c54887ad1247cdf623dc008e31a418", size = 631945, upload-time = "2026-04-17T22:03:08.584Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/9a/48816e9e83854c00b26c165a8fbb53701a5b845c8f403d19dad52418fe84/data_designer_engine-0.5.9-py3-none-any.whl", hash = "sha256:09b4db93304d7a1fc1500f562684cc9199e9b9d986fbb18f21609e152390b01a", size = 632446, upload-time = "2026-04-28T23:22:59.734Z" },
 ]
 
+[[package]]
+name = "data-designer-generalist-agent-env"
+version = "0.1.0"
+source = { editable = "plugins/data-designer-generalist-agent-env" }
+dependencies = [
+    { name = "data-designer" },
+]
+
+[package.metadata]
+requires-dist = [{ name = "data-designer", specifier = ">=0.5.9" }]
+
 [[package]]
 name = "data-designer-plugins-workspace"
 version = "0.0.0"
diff --git a/zensical.toml b/zensical.toml
index 3f1af80..d2ac622 100644
--- a/zensical.toml
+++ b/zensical.toml
@@ -19,6 +19,10 @@ nav = [
   {"Plugins" = [
     {"Overview" = "plugins/index.md"},
     # BEGIN GENERATED PLUGIN DOCS NAV
+    {"data-designer-generalist-agent-env" = [
+      {"Overview" = "plugins/data-designer-generalist-agent-env/index.md"},
+      {"Usage" = "plugins/data-designer-generalist-agent-env/usage.md"},
+    ]},
     {"data-designer-template" = [
       {"Overview" = "plugins/data-designer-template/index.md"},
       {"Usage" = "plugins/data-designer-template/usage.md"},

From ff78c9615a2c2a69745208144ea65609e7434a8e Mon Sep 17 00:00:00 2001
From: "Eric W. Tramel" <eric.tramel@gmail.com>
Date: Wed, 6 May 2026 21:11:00 -0400
Subject: [PATCH 2/4] Add executable row validation helper

---
 .../index.md                                  |  13 +
 .../usage.md                                  |  16 +-
 .../docs/index.md                             |  13 +
 .../docs/usage.md                             |  16 +-
 .../__init__.py                               |  16 +
 .../impl.py                                   |   5 +-
 .../validation.py                             | 515 ++++++++++++++++++
 .../tests/test_plugin.py                      |  61 ++-
 8 files changed, 608 insertions(+), 47 deletions(-)
 create mode 100644 plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/validation.py

diff --git a/docs/plugins/data-designer-generalist-agent-env/index.md b/docs/plugins/data-designer-generalist-agent-env/index.md
index 23137c9..1880e4a 100644
--- a/docs/plugins/data-designer-generalist-agent-env/index.md
+++ b/docs/plugins/data-designer-generalist-agent-env/index.md
@@ -70,6 +70,19 @@ The generated `agent_env` value is a dictionary with these top-level keys:
 | `task_iterations` | Simple-to-final task, solution, verifier, and augmentation artifacts. |
 | `synthesis_trace` | Environment construction, task synthesis, tool augmentation, solution, and verification events. |
 
+## Row validation helper
+
+The package includes a helper module for executable row validation. It executes
+the generated tool module, smoke-tests the declared tools, runs the generated
+solution, checks the generated verifier, and replays every task iteration:
+
+```python
+from data_designer_generalist_agent_env.validation import verify_row_record
+
+validation = verify_row_record(result.dataset.loc[0], output_column="agent_env")
+assert validation.passed, validation.errors
+```
+
 ## Behavior Notes
 
 The plugin is deterministic and does not call the Internet. It records `bash`
diff --git a/docs/plugins/data-designer-generalist-agent-env/usage.md b/docs/plugins/data-designer-generalist-agent-env/usage.md
index f46421d..8ed6215 100644
--- a/docs/plugins/data-designer-generalist-agent-env/usage.md
+++ b/docs/plugins/data-designer-generalist-agent-env/usage.md
@@ -32,20 +32,14 @@ result = DataDesigner(artifact_path="artifacts").preview(builder, num_records=1)
 environment_tuple = result.dataset.loc[0, "agent_env"]
 ```
 
-The solution can be smoke-tested by executing the generated source:
+The generated row can be validated with the package helper:
 
 ```python
-tool_namespace = {}
-exec(environment_tuple["tool_module_source"], tool_namespace)
-tools = {tool["name"]: tool_namespace[tool["name"]] for tool in environment_tuple["tools"]}
+from data_designer_generalist_agent_env.validation import verify_environment_tuple
 
-solution_namespace = {}
-exec(environment_tuple["solution"]["source"], solution_namespace)
-answer = solution_namespace["solve"](tools)
-
-verifier_namespace = {}
-exec(environment_tuple["verifier"]["source"], verifier_namespace)
-assert verifier_namespace["verify"](answer, environment_tuple["environment"]["database"])
+validation = verify_environment_tuple(environment_tuple)
+assert validation.passed, validation.errors
+assert validation.answer == environment_tuple["reference_answer"]
 ```
 
 The output task is intentionally search-like: the solving agent must inspect,
diff --git a/plugins/data-designer-generalist-agent-env/docs/index.md b/plugins/data-designer-generalist-agent-env/docs/index.md
index 23137c9..1880e4a 100644
--- a/plugins/data-designer-generalist-agent-env/docs/index.md
+++ b/plugins/data-designer-generalist-agent-env/docs/index.md
@@ -70,6 +70,19 @@ The generated `agent_env` value is a dictionary with these top-level keys:
 | `task_iterations` | Simple-to-final task, solution, verifier, and augmentation artifacts. |
 | `synthesis_trace` | Environment construction, task synthesis, tool augmentation, solution, and verification events. |
 
+## Row validation helper
+
+The package includes a helper module for executable row validation. It executes
+the generated tool module, smoke-tests the declared tools, runs the generated
+solution, checks the generated verifier, and replays every task iteration:
+
+```python
+from data_designer_generalist_agent_env.validation import verify_row_record
+
+validation = verify_row_record(result.dataset.loc[0], output_column="agent_env")
+assert validation.passed, validation.errors
+```
+
 ## Behavior Notes
 
 The plugin is deterministic and does not call the Internet. It records `bash`
diff --git a/plugins/data-designer-generalist-agent-env/docs/usage.md b/plugins/data-designer-generalist-agent-env/docs/usage.md
index f46421d..8ed6215 100644
--- a/plugins/data-designer-generalist-agent-env/docs/usage.md
+++ b/plugins/data-designer-generalist-agent-env/docs/usage.md
@@ -32,20 +32,14 @@ result = DataDesigner(artifact_path="artifacts").preview(builder, num_records=1)
 environment_tuple = result.dataset.loc[0, "agent_env"]
 ```
 
-The solution can be smoke-tested by executing the generated source:
+The generated row can be validated with the package helper:
 
 ```python
-tool_namespace = {}
-exec(environment_tuple["tool_module_source"], tool_namespace)
-tools = {tool["name"]: tool_namespace[tool["name"]] for tool in environment_tuple["tools"]}
+from data_designer_generalist_agent_env.validation import verify_environment_tuple
 
-solution_namespace = {}
-exec(environment_tuple["solution"]["source"], solution_namespace)
-answer = solution_namespace["solve"](tools)
-
-verifier_namespace = {}
-exec(environment_tuple["verifier"]["source"], verifier_namespace)
-assert verifier_namespace["verify"](answer, environment_tuple["environment"]["database"])
+validation = verify_environment_tuple(environment_tuple)
+assert validation.passed, validation.errors
+assert validation.answer == environment_tuple["reference_answer"]
 ```
 
 The output task is intentionally search-like: the solving agent must inspect,
diff --git a/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/__init__.py b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/__init__.py
index 52a7a9d..4c635f4 100644
--- a/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/__init__.py
+++ b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/__init__.py
@@ -1,2 +1,18 @@
 # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
+
+from data_designer_generalist_agent_env.validation import (
+    IterationExecutionCheck,
+    RowRecordValidationResult,
+    ToolExecutionCheck,
+    verify_environment_tuple,
+    verify_row_record,
+)
+
+__all__ = [
+    "IterationExecutionCheck",
+    "RowRecordValidationResult",
+    "ToolExecutionCheck",
+    "verify_environment_tuple",
+    "verify_row_record",
+]
diff --git a/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/impl.py b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/impl.py
index c5dd423..429b3ba 100644
--- a/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/impl.py
+++ b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/impl.py
@@ -512,6 +512,7 @@ def build_solution_source(constraints: dict[str, Any], difficulty: Difficulty) -
     lines = [
         "def solve(tools):",
         '    """Solve the task using only synthesized tool functions and local logic."""',
+        f"    required_tag = {required_tag}",
     ]
 
     if difficulty == "simple":
@@ -523,7 +524,7 @@ def build_solution_source(constraints: dict[str, Any], difficulty: Difficulty) -
                 "            continue",
                 f'        if int(record["score"]) < {constraints["min_score"]}:',
                 "            continue",
-                f'        if {required_tag} is not None and {required_tag} not in record.get("tags", []):',
+                '        if required_tag is not None and required_tag not in record.get("tags", []):',
                 "            continue",
                 "        candidates.append(record)",
             ]
@@ -534,7 +535,7 @@ def build_solution_source(constraints: dict[str, Any], difficulty: Difficulty) -
                 '    candidates = tools["filter_records"](',
                 f"        max_cost={constraints['max_cost']},",
                 f"        min_score={constraints['min_score']},",
-                f"        required_tag={required_tag},",
+                "        required_tag=required_tag,",
                 "    )",
             ]
         )
diff --git a/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/validation.py b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/validation.py
new file mode 100644
index 0000000..ef4b34d
--- /dev/null
+++ b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/validation.py
@@ -0,0 +1,515 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+from collections.abc import Callable, Mapping
+from dataclasses import dataclass, field
+from typing import Any
+
+
+@dataclass(frozen=True)
+class ToolExecutionCheck:
+    """Execution result for one generated tool.
+
+    Attributes:
+        name: Tool name from the generated row artifact.
+        passed: Whether the tool executed and returned the expected output shape.
+        output_type: Python type name returned by the smoke invocation.
+        output_size: Length of the output when the output is a sized collection.
+        error: Error message when execution failed.
+    """
+
+    name: str
+    passed: bool
+    output_type: str | None = None
+    output_size: int | None = None
+    error: str | None = None
+
+
+@dataclass(frozen=True)
+class IterationExecutionCheck:
+    """Execution result for one generated task iteration.
+
+    Attributes:
+        difficulty: Iteration difficulty label.
+        passed: Whether the iteration solution was accepted by its verifier.
+        answer: Answer returned by the iteration solution.
+        verifier_passed: Raw verifier decision for the generated answer.
+        error: Error message when execution failed.
+    """
+
+    difficulty: str
+    passed: bool
+    answer: Any | None = None
+    verifier_passed: bool = False
+    error: str | None = None
+
+
+@dataclass(frozen=True)
+class RowRecordValidationResult:
+    """Validation result for a generated Generalist environment row record.
+
+    Attributes:
+        passed: Whether all executable artifacts passed validation.
+        answer: Answer returned by the final generated solution.
+        verifier_passed: Raw verifier decision for the final generated answer.
+        tools_passed: Whether all generated tools passed smoke execution.
+        tool_checks: Per-tool execution checks.
+        iteration_checks: Per-iteration solution and verifier checks.
+        errors: Validation errors collected across executable artifacts.
+    """
+
+    passed: bool
+    answer: Any | None
+    verifier_passed: bool
+    tools_passed: bool
+    tool_checks: list[ToolExecutionCheck] = field(default_factory=list)
+    iteration_checks: list[IterationExecutionCheck] = field(default_factory=list)
+    errors: list[str] = field(default_factory=list)
+
+
+def failed_validation(error: str) -> RowRecordValidationResult:
+    """Build a failed validation result.
+
+    Args:
+        error: Error message to attach to the result.
+
+    Returns:
+        A failed validation result with no executable artifacts.
+    """
+    return RowRecordValidationResult(
+        passed=False,
+        answer=None,
+        verifier_passed=False,
+        tools_passed=False,
+        errors=[error],
+    )
+
+
+def execute_source_module(source: str, expected_entrypoints: list[str] | None = None) -> dict[str, Any]:
+    """Execute generated Python source and return its namespace.
+
+    Args:
+        source: Python module source emitted by the plugin.
+        expected_entrypoints: Callable names that must exist after execution.
+
+    Returns:
+        The execution namespace.
+
+    Raises:
+        ValueError: If an expected entrypoint is missing or is not callable.
+        Exception: Any exception raised by the generated source during execution.
+    """
+    namespace: dict[str, Any] = {}
+    exec(source, namespace)
+    for entrypoint in expected_entrypoints or []:
+        candidate = namespace.get(entrypoint)
+        if not callable(candidate):
+            msg = f"expected callable {entrypoint!r} in generated source"
+            raise ValueError(msg)
+    return namespace
+
+
+def extract_environment_tuple(row_record: Mapping[str, Any], output_column: str | None = None) -> Mapping[str, Any]:
+    """Extract the generated environment tuple from a row-like record.
+
+    Args:
+        row_record: Either the generated environment tuple itself or a row mapping
+            that contains the tuple in ``output_column``.
+        output_column: Optional output column containing the generated tuple.
+
+    Returns:
+        The generated environment tuple mapping.
+
+    Raises:
+        KeyError: If ``output_column`` is supplied but absent.
+        TypeError: If the extracted value is not mapping-like.
+        ValueError: If ``row_record`` is not already an environment tuple and no
+            ``output_column`` is supplied.
+    """
+    if output_column is None:
+        if "schema_version" in row_record and "environment" in row_record and "tools" in row_record:
+            return row_record
+        msg = "row_record must be an environment tuple unless output_column is provided"
+        raise ValueError(msg)
+
+    if output_column not in row_record:
+        msg = f"row_record does not contain output column {output_column!r}"
+        raise KeyError(msg)
+
+    environment_tuple = row_record[output_column]
+    if not isinstance(environment_tuple, Mapping):
+        msg = f"row_record[{output_column!r}] must be a mapping"
+        raise TypeError(msg)
+    return environment_tuple
+
+
+def output_size(output: Any) -> int | None:
+    """Return a compact size for common collection outputs.
+
+    Args:
+        output: Tool output.
+
+    Returns:
+        The output length for common collections, otherwise ``None``.
+    """
+    if isinstance(output, (dict, list, set, tuple)):
+        return len(output)
+    return None
+
+
+def tool_output_error(tool_name: str, output: Any) -> str | None:
+    """Validate the expected output shape for a generated tool.
+
+    Args:
+        tool_name: Tool name.
+        output: Value returned by the tool smoke invocation.
+
+    Returns:
+        An error message when the output shape is unexpected, otherwise ``None``.
+    """
+    if tool_name in {"list_records", "search_records", "filter_records", "rank_records"}:
+        if not isinstance(output, list):
+            return f"{tool_name} returned {type(output).__name__}; expected list"
+    if tool_name == "get_record" and output is not None and not isinstance(output, dict):
+        return f"get_record returned {type(output).__name__}; expected dict or None"
+    return None
+
+
+def invoke_tool_for_smoke_check(
+    tool_name: str,
+    tool: Callable[..., Any],
+    database: list[dict[str, Any]],
+    constraints: Mapping[str, Any],
+) -> Any:
+    """Invoke one generated tool with a row-local smoke-test call.
+
+    Args:
+        tool_name: Tool name.
+        tool: Callable loaded from the generated tool module.
+        database: Row-local sandbox database.
+        constraints: Final task constraints.
+
+    Returns:
+        The tool output.
+    """
+    if tool_name == "list_records":
+        return tool()
+    if tool_name == "search_records":
+        return tool("", max_results=2)
+    if tool_name == "get_record":
+        record_id = database[0].get("record_id") if database else "__missing__"
+        return tool(record_id)
+    if tool_name == "filter_records":
+        return tool(
+            max_cost=constraints.get("max_cost"),
+            min_score=constraints.get("min_score"),
+            required_tag=constraints.get("required_tag"),
+        )
+    if tool_name == "rank_records":
+        return tool(list(database), metric="score", descending=True)
+    return tool()
+
+
+def run_tool_execution_check(
+    tool_name: str,
+    tool: Callable[..., Any],
+    database: list[dict[str, Any]],
+    constraints: Mapping[str, Any],
+) -> ToolExecutionCheck:
+    """Execute one generated tool and validate its output shape.
+
+    Args:
+        tool_name: Tool name.
+        tool: Callable loaded from the generated tool module.
+        database: Row-local sandbox database.
+        constraints: Final task constraints.
+
+    Returns:
+        A structured per-tool execution result.
+    """
+    try:
+        output = invoke_tool_for_smoke_check(tool_name, tool, database, constraints)
+    except Exception as exc:  # noqa: BLE001
+        return ToolExecutionCheck(name=tool_name, passed=False, error=str(exc))
+
+    error = tool_output_error(tool_name, output)
+    return ToolExecutionCheck(
+        name=tool_name,
+        passed=error is None,
+        output_type=type(output).__name__,
+        output_size=output_size(output),
+        error=error,
+    )
+
+
+def build_tools_from_namespace(
+    tool_names: list[str],
+    namespace: Mapping[str, Any],
+) -> tuple[dict[str, Callable[..., Any]], list[str]]:
+    """Build the generated tool mapping from an executed namespace.
+
+    Args:
+        tool_names: Names requested by the row artifact.
+        namespace: Namespace returned by ``execute_source_module``.
+
+    Returns:
+        A tuple of callable tools and validation errors.
+    """
+    tools: dict[str, Callable[..., Any]] = {}
+    errors: list[str] = []
+    for tool_name in tool_names:
+        candidate = namespace.get(tool_name)
+        if not callable(candidate):
+            errors.append(f"generated tool {tool_name!r} is missing or not callable")
+            continue
+        tools[tool_name] = candidate
+    return tools, errors
+
+
+def tool_names_from_specs(tool_specs: Any) -> tuple[list[str], list[str]]:
+    """Extract tool names from generated tool specs.
+
+    Args:
+        tool_specs: Value from the environment tuple ``tools`` field.
+
+    Returns:
+        A tuple of tool names and validation errors.
+    """
+    if not isinstance(tool_specs, list):
+        return [], ["environment tuple tools field must be a list"]
+
+    tool_names: list[str] = []
+    errors: list[str] = []
+    for index, tool_spec in enumerate(tool_specs):
+        if not isinstance(tool_spec, Mapping):
+            errors.append(f"tools[{index}] must be a mapping")
+            continue
+        tool_name = tool_spec.get("name")
+        if not isinstance(tool_name, str) or not tool_name:
+            errors.append(f"tools[{index}].name must be a non-empty string")
+            continue
+        tool_names.append(tool_name)
+    return tool_names, errors
+
+
+def run_solution_and_verifier(
+    solution_source: str,
+    solution_entrypoint: str,
+    verifier_source: str,
+    verifier_entrypoint: str,
+    tools: Mapping[str, Callable[..., Any]],
+    database: list[dict[str, Any]],
+) -> tuple[Any | None, bool, list[str]]:
+    """Execute generated solution source and validate it with generated verifier source.
+
+    Args:
+        solution_source: Python source defining the solution function.
+        solution_entrypoint: Name of the solution function.
+        verifier_source: Python source defining the verifier function.
+        verifier_entrypoint: Name of the verifier function.
+        tools: Callable tool mapping exposed to the solution.
+        database: Row-local sandbox database exposed to the verifier.
+
+    Returns:
+        The solution answer, verifier decision, and collected execution errors.
+    """
+    errors: list[str] = []
+    try:
+        solution_namespace = execute_source_module(solution_source, [solution_entrypoint])
+        answer = solution_namespace[solution_entrypoint](dict(tools))
+    except Exception as exc:  # noqa: BLE001
+        return None, False, [f"solution execution failed: {exc}"]
+
+    try:
+        verifier_namespace = execute_source_module(verifier_source, [verifier_entrypoint])
+        verifier_passed = bool(verifier_namespace[verifier_entrypoint](answer, database))
+    except Exception as exc:  # noqa: BLE001
+        return answer, False, [f"verifier execution failed: {exc}"]
+
+    if not verifier_passed:
+        errors.append("verifier rejected generated solution answer")
+    return answer, verifier_passed, errors
+
+
+def run_iteration_execution_check(
+    iteration: Mapping[str, Any],
+    tool_namespace: Mapping[str, Any],
+    database: list[dict[str, Any]],
+) -> IterationExecutionCheck:
+    """Execute and verify one generated task iteration.
+
+    Args:
+        iteration: Task iteration artifact from ``task_iterations``.
+        tool_namespace: Executed namespace from ``tool_module_source``.
+        database: Row-local sandbox database.
+
+    Returns:
+        A structured per-iteration execution result.
+    """
+    difficulty = str(iteration.get("difficulty", "unknown"))
+    tool_names = iteration.get("tool_names", [])
+    if not isinstance(tool_names, list) or not all(isinstance(tool_name, str) for tool_name in tool_names):
+        return IterationExecutionCheck(
+            difficulty=difficulty,
+            passed=False,
+            error="iteration tool_names must be a list of strings",
+        )
+
+    tools, tool_errors = build_tools_from_namespace(tool_names, tool_namespace)
+    if tool_errors:
+        return IterationExecutionCheck(difficulty=difficulty, passed=False, error="; ".join(tool_errors))
+
+    answer, verifier_passed, errors = run_solution_and_verifier(
+        str(iteration.get("solution_source", "")),
+        "solve",
+        str(iteration.get("verifier_source", "")),
+        "verify",
+        tools,
+        database,
+    )
+    reference_answer = iteration.get("reference_answer")
+    if reference_answer is not None and answer != reference_answer:
+        errors.append("iteration answer does not match reference_answer")
+
+    expected_passed = iteration.get("reference_solution_passed")
+    if expected_passed is not None and bool(expected_passed) != verifier_passed:
+        errors.append("iteration reference_solution_passed does not match verifier result")
+
+    return IterationExecutionCheck(
+        difficulty=difficulty,
+        passed=not errors and verifier_passed,
+        answer=answer,
+        verifier_passed=verifier_passed,
+        error="; ".join(errors) if errors else None,
+    )
+
+
+def verify_environment_tuple(environment_tuple: Mapping[str, Any]) -> RowRecordValidationResult:
+    """Verify one generated environment tuple by executing all generated artifacts.
+
+    The helper executes the generated tool module, smoke-tests every declared
+    tool, runs the final generated ``solve(tools)`` function, and checks the
+    result with the generated ``verify(answer, database)`` function. It also
+    replays every artifact in ``task_iterations`` when present.
+
+    Args:
+        environment_tuple: Generated ``generalist-agent-env`` output value.
+
+    Returns:
+        A structured validation result with per-artifact status and errors.
+    """
+    errors: list[str] = []
+    try:
+        environment = environment_tuple["environment"]
+        database = environment["database"]
+        task = environment_tuple["task"]
+        constraints = task["constraints"]
+    except KeyError as exc:
+        return failed_validation(f"environment tuple is missing required key: {exc}")
+
+    if not isinstance(database, list):
+        return failed_validation("environment.database must be a list")
+    if not all(isinstance(record, dict) for record in database):
+        return failed_validation("environment.database must contain dict records")
+    if not isinstance(constraints, Mapping):
+        return failed_validation("task.constraints must be a mapping")
+
+    tool_names, tool_spec_errors = tool_names_from_specs(environment_tuple.get("tools"))
+    errors.extend(tool_spec_errors)
+
+    try:
+        tool_namespace = execute_source_module(str(environment_tuple["tool_module_source"]), tool_names)
+    except Exception as exc:  # noqa: BLE001
+        return RowRecordValidationResult(
+            passed=False,
+            answer=None,
+            verifier_passed=False,
+            tools_passed=False,
+            errors=[*errors, f"tool_module_source execution failed: {exc}"],
+        )
+
+    tools, tool_errors = build_tools_from_namespace(tool_names, tool_namespace)
+    errors.extend(tool_errors)
+    tool_checks = [
+        run_tool_execution_check(tool_name, tool, database, constraints) for tool_name, tool in tools.items()
+    ]
+    errors.extend(
+        f"tool {check.name!r} failed smoke execution: {check.error}" for check in tool_checks if not check.passed
+    )
+    tools_passed = not tool_spec_errors and not tool_errors and all(check.passed for check in tool_checks)
+
+    solution = environment_tuple.get("solution", {})
+    verifier = environment_tuple.get("verifier", {})
+    if not isinstance(solution, Mapping) or not isinstance(verifier, Mapping):
+        return RowRecordValidationResult(
+            passed=False,
+            answer=None,
+            verifier_passed=False,
+            tools_passed=tools_passed,
+            tool_checks=tool_checks,
+            errors=[*errors, "solution and verifier fields must be mappings"],
+        )
+
+    answer, verifier_passed, source_errors = run_solution_and_verifier(
+        str(solution.get("source", "")),
+        str(solution.get("entrypoint", "solve")),
+        str(verifier.get("source", "")),
+        str(verifier.get("entrypoint", "verify")),
+        tools,
+        database,
+    )
+    errors.extend(source_errors)
+
+    reference_answer = environment_tuple.get("reference_answer")
+    if reference_answer is not None and answer != reference_answer:
+        errors.append("final solution answer does not match reference_answer")
+
+    expected_passed = verifier.get("reference_solution_passed")
+    if expected_passed is not None and bool(expected_passed) != verifier_passed:
+        errors.append("verifier.reference_solution_passed does not match verifier result")
+
+    iteration_checks: list[IterationExecutionCheck] = []
+    task_iterations = environment_tuple.get("task_iterations", [])
+    if isinstance(task_iterations, list):
+        iteration_checks = [
+            run_iteration_execution_check(iteration, tool_namespace, database)
+            for iteration in task_iterations
+            if isinstance(iteration, Mapping)
+        ]
+        errors.extend(
+            f"iteration {check.difficulty!r} failed execution: {check.error}"
+            for check in iteration_checks
+            if not check.passed
+        )
+    elif task_iterations is not None:
+        errors.append("task_iterations must be a list when present")
+
+    passed = not errors and tools_passed and verifier_passed and all(check.passed for check in iteration_checks)
+    return RowRecordValidationResult(
+        passed=passed,
+        answer=answer,
+        verifier_passed=verifier_passed,
+        tools_passed=tools_passed,
+        tool_checks=tool_checks,
+        iteration_checks=iteration_checks,
+        errors=errors,
+    )
+
+
+def verify_row_record(row_record: Mapping[str, Any], output_column: str | None = None) -> RowRecordValidationResult:
+    """Verify a Data Designer row record containing a generated environment tuple.
+
+    Args:
+        row_record: Row mapping or generated environment tuple.
+        output_column: Optional column name containing the generated tuple.
+
+    Returns:
+        A structured validation result.
+    """
+    try:
+        environment_tuple = extract_environment_tuple(row_record, output_column)
+    except (KeyError, TypeError, ValueError) as exc:
+        return failed_validation(str(exc))
+    return verify_environment_tuple(environment_tuple)
diff --git a/plugins/data-designer-generalist-agent-env/tests/test_plugin.py b/plugins/data-designer-generalist-agent-env/tests/test_plugin.py
index 6c5a803..94a787a 100644
--- a/plugins/data-designer-generalist-agent-env/tests/test_plugin.py
+++ b/plugins/data-designer-generalist-agent-env/tests/test_plugin.py
@@ -1,8 +1,8 @@
 # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
+from copy import deepcopy
 from pathlib import Path
-from typing import Any
 
 import pandas as pd
 import pytest
@@ -21,6 +21,7 @@
     selected_tool_names,
 )
 from data_designer_generalist_agent_env.plugin import plugin
+from data_designer_generalist_agent_env.validation import verify_environment_tuple, verify_row_record
 
 
 def test_valid_plugin() -> None:
@@ -34,22 +35,6 @@ def make_generator(config: GeneralistAgentEnvColumnConfig) -> GeneralistAgentEnv
     return generator
 
 
-def run_generated_sources(environment_tuple: dict[str, Any]) -> tuple[dict[str, Any], bool]:
-    """Execute generated tool, solution, and verifier source for one tuple."""
-    tool_namespace: dict[str, Any] = {}
-    exec(environment_tuple["tool_module_source"], tool_namespace)
-    tools = {tool["name"]: tool_namespace[tool["name"]] for tool in environment_tuple["tools"]}
-
-    solution_namespace: dict[str, Any] = {}
-    exec(environment_tuple["solution"]["source"], solution_namespace)
-    answer = solution_namespace["solve"](tools)
-
-    verifier_namespace: dict[str, Any] = {}
-    exec(environment_tuple["verifier"]["source"], verifier_namespace)
-    verified = verifier_namespace["verify"](answer, environment_tuple["environment"]["database"])
-    return answer, verified
-
-
 class TestGeneralistAgentEnvColumnConfig:
     def test_required_columns_include_category_and_context(self) -> None:
         config = GeneralistAgentEnvColumnConfig(
@@ -114,10 +99,12 @@ def test_reference_answer_is_verifier_optimal(self) -> None:
             row_number=0,
         )
 
-        answer, verified = run_generated_sources(environment_tuple)
+        validation = verify_environment_tuple(environment_tuple)
 
-        assert verified is True
-        assert answer == environment_tuple["reference_answer"]
+        assert validation.passed is True
+        assert validation.verifier_passed is True
+        assert validation.tools_passed is True
+        assert validation.answer == environment_tuple["reference_answer"]
         assert environment_tuple["verifier"]["reference_solution_passed"] is True
         assert environment_tuple["task"]["constraints"]["required_tag"] == "family"
 
@@ -179,10 +166,38 @@ def test_generated_python_sources_pass_verifier(self) -> None:
         generator = make_generator(config)
         result = generator.generate(source_df)
 
-        answer, verified = run_generated_sources(result.loc[0, "agent_env"])
+        validation = verify_environment_tuple(result.loc[0, "agent_env"])
+
+        assert validation.passed is True
+        assert validation.answer["record_id"]
+        assert {check.name for check in validation.tool_checks} == set(selected_tool_names("hard"))
+        assert [check.difficulty for check in validation.iteration_checks] == ["simple", "medium", "hard"]
+
+    def test_row_record_validation_reads_named_output_column(self) -> None:
+        source_df = pd.DataFrame({"category": ["planning a travel itinerary"]})
+        config = GeneralistAgentEnvColumnConfig(name="agent_env", task_category_column="category")
+        generator = make_generator(config)
+        result = generator.generate(source_df)
+
+        validation = verify_row_record(result.loc[0], output_column="agent_env")
+
+        assert validation.passed is True
+        assert validation.verifier_passed is True
+
+    def test_row_record_validation_reports_missing_tool_implementation(self) -> None:
+        config = GeneralistAgentEnvColumnConfig(name="agent_env", task_category_column="category")
+        environment_tuple = build_environment_tuple("planning a travel itinerary", {}, config, row_number=0)
+        broken_tuple = deepcopy(environment_tuple)
+        broken_tuple["tool_module_source"] = broken_tuple["tool_module_source"].replace(
+            "def rank_records(",
+            "def missing_rank_records(",
+        )
+
+        validation = verify_environment_tuple(broken_tuple)
 
-        assert verified is True
-        assert answer["record_id"]
+        assert validation.passed is False
+        assert validation.tools_passed is False
+        assert any("rank_records" in error for error in validation.errors)
 
 
 class TestGeneralistAgentEnvPreviewIntegration:

From 5b61b373f14a8770066bbcea161931202081684b Mon Sep 17 00:00:00 2001
From: "Eric W. Tramel" <eric.tramel@gmail.com>
Date: Wed, 6 May 2026 21:25:56 -0400
Subject: [PATCH 3/4] Handle saved row validation artifacts

---
 .../validation.py                             | 108 +++++++++++++++---
 .../tests/test_plugin.py                      |  37 +++++-
 2 files changed, 129 insertions(+), 16 deletions(-)

diff --git a/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/validation.py b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/validation.py
index ef4b34d..a14e213 100644
--- a/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/validation.py
+++ b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/validation.py
@@ -159,6 +159,85 @@ def output_size(output: Any) -> int | None:
     return None
 
 
+def coerce_list_like(value: Any) -> list[Any] | None:
+    """Coerce common list-like values into a Python list.
+
+    Args:
+        value: Candidate list-like value. This includes values restored from
+            nested Parquet structures, such as NumPy arrays, without importing
+            optional array libraries directly.
+
+    Returns:
+        A Python list when coercion is possible, otherwise ``None``.
+    """
+    if isinstance(value, list):
+        return value
+    if isinstance(value, tuple):
+        return list(value)
+    tolist = getattr(value, "tolist", None)
+    if callable(tolist):
+        converted = tolist()
+        if isinstance(converted, list):
+            return converted
+    return None
+
+
+def to_plain_data(value: Any) -> Any:
+    """Convert nested array-like values into JSON-style Python containers.
+
+    Args:
+        value: Arbitrary generated artifact value.
+
+    Returns:
+        The value with mappings and list-like values recursively normalized.
+    """
+    if isinstance(value, Mapping):
+        return {key: to_plain_data(nested_value) for key, nested_value in value.items()}
+
+    values = coerce_list_like(value)
+    if values is not None:
+        return [to_plain_data(nested_value) for nested_value in values]
+
+    item = getattr(value, "item", None)
+    if callable(item) and not isinstance(value, (str, bytes)):
+        try:
+            return item()
+        except (TypeError, ValueError):
+            return value
+    return value
+
+
+def normalize_database(database: Any) -> tuple[list[dict[str, Any]] | None, str | None]:
+    """Normalize an environment database for execution.
+
+    Args:
+        database: Database value from a generated row. In-memory Data Designer
+            rows use lists, while saved Parquet artifacts may restore nested
+            lists as array-like values.
+
+    Returns:
+        A tuple of normalized database records and an error message. Exactly one
+        element is non-``None``.
+    """
+    records = coerce_list_like(database)
+    if records is None:
+        return None, "environment.database must be list-like"
+
+    normalized_records: list[dict[str, Any]] = []
+    for index, record in enumerate(records):
+        if not isinstance(record, Mapping):
+            return None, f"environment.database[{index}] must be a mapping"
+        normalized_record = dict(record)
+        tags = normalized_record.get("tags")
+        if tags is not None and not isinstance(tags, list):
+            normalized_tags = coerce_list_like(tags)
+            if normalized_tags is None:
+                return None, f"environment.database[{index}].tags must be list-like"
+            normalized_record["tags"] = normalized_tags
+        normalized_records.append(normalized_record)
+    return normalized_records, None
+
+
 def tool_output_error(tool_name: str, output: Any) -> str | None:
     """Validate the expected output shape for a generated tool.
 
@@ -277,8 +356,9 @@ def tool_names_from_specs(tool_specs: Any) -> tuple[list[str], list[str]]:
     Returns:
         A tuple of tool names and validation errors.
     """
-    if not isinstance(tool_specs, list):
-        return [], ["environment tuple tools field must be a list"]
+    tool_specs = coerce_list_like(tool_specs)
+    if tool_specs is None:
+        return [], ["environment tuple tools field must be list-like"]
 
     tool_names: list[str] = []
     errors: list[str] = []
@@ -349,12 +429,12 @@ def run_iteration_execution_check(
         A structured per-iteration execution result.
     """
     difficulty = str(iteration.get("difficulty", "unknown"))
-    tool_names = iteration.get("tool_names", [])
-    if not isinstance(tool_names, list) or not all(isinstance(tool_name, str) for tool_name in tool_names):
+    tool_names = coerce_list_like(iteration.get("tool_names", []))
+    if tool_names is None or not all(isinstance(tool_name, str) for tool_name in tool_names):
         return IterationExecutionCheck(
             difficulty=difficulty,
             passed=False,
-            error="iteration tool_names must be a list of strings",
+            error="iteration tool_names must be a list-like value of strings",
         )
 
     tools, tool_errors = build_tools_from_namespace(tool_names, tool_namespace)
@@ -370,7 +450,7 @@ def run_iteration_execution_check(
         database,
     )
     reference_answer = iteration.get("reference_answer")
-    if reference_answer is not None and answer != reference_answer:
+    if reference_answer is not None and to_plain_data(answer) != to_plain_data(reference_answer):
         errors.append("iteration answer does not match reference_answer")
 
     expected_passed = iteration.get("reference_solution_passed")
@@ -409,10 +489,9 @@ def verify_environment_tuple(environment_tuple: Mapping[str, Any]) -> RowRecordV
     except KeyError as exc:
         return failed_validation(f"environment tuple is missing required key: {exc}")
 
-    if not isinstance(database, list):
-        return failed_validation("environment.database must be a list")
-    if not all(isinstance(record, dict) for record in database):
-        return failed_validation("environment.database must contain dict records")
+    database, database_error = normalize_database(database)
+    if database_error is not None or database is None:
+        return failed_validation(database_error or "environment.database could not be normalized")
     if not isinstance(constraints, Mapping):
         return failed_validation("task.constraints must be a mapping")
 
@@ -463,7 +542,7 @@ def verify_environment_tuple(environment_tuple: Mapping[str, Any]) -> RowRecordV
     errors.extend(source_errors)
 
     reference_answer = environment_tuple.get("reference_answer")
-    if reference_answer is not None and answer != reference_answer:
+    if reference_answer is not None and to_plain_data(answer) != to_plain_data(reference_answer):
         errors.append("final solution answer does not match reference_answer")
 
     expected_passed = verifier.get("reference_solution_passed")
@@ -472,7 +551,8 @@ def verify_environment_tuple(environment_tuple: Mapping[str, Any]) -> RowRecordV
 
     iteration_checks: list[IterationExecutionCheck] = []
     task_iterations = environment_tuple.get("task_iterations", [])
-    if isinstance(task_iterations, list):
+    task_iterations = coerce_list_like(task_iterations)
+    if task_iterations is not None:
         iteration_checks = [
             run_iteration_execution_check(iteration, tool_namespace, database)
             for iteration in task_iterations
@@ -483,8 +563,8 @@ def verify_environment_tuple(environment_tuple: Mapping[str, Any]) -> RowRecordV
             for check in iteration_checks
             if not check.passed
         )
-    elif task_iterations is not None:
-        errors.append("task_iterations must be a list when present")
+    else:
+        errors.append("task_iterations must be list-like when present")
 
     passed = not errors and tools_passed and verifier_passed and all(check.passed for check in iteration_checks)
     return RowRecordValidationResult(
diff --git a/plugins/data-designer-generalist-agent-env/tests/test_plugin.py b/plugins/data-designer-generalist-agent-env/tests/test_plugin.py
index 94a787a..4ad4c81 100644
--- a/plugins/data-designer-generalist-agent-env/tests/test_plugin.py
+++ b/plugins/data-designer-generalist-agent-env/tests/test_plugin.py
@@ -161,8 +161,17 @@ def test_generate_creates_environment_tuple(self) -> None:
         ]
 
     def test_generated_python_sources_pass_verifier(self) -> None:
-        source_df = pd.DataFrame({"category": ["planning a travel itinerary"]})
-        config = GeneralistAgentEnvColumnConfig(name="agent_env", task_category_column="category")
+        source_df = pd.DataFrame(
+            {
+                "category": ["planning a travel itinerary"],
+                "constraints": ["compare candidate plans by score, cost, and family suitability"],
+            }
+        )
+        config = GeneralistAgentEnvColumnConfig(
+            name="agent_env",
+            task_category_column="category",
+            context_columns=["constraints"],
+        )
         generator = make_generator(config)
         result = generator.generate(source_df)
 
@@ -199,6 +208,30 @@ def test_row_record_validation_reports_missing_tool_implementation(self) -> None
         assert validation.tools_passed is False
         assert any("rank_records" in error for error in validation.errors)
 
+    def test_row_record_validation_accepts_parquet_restored_arrays(self, tmp_path: Path) -> None:
+        source_df = pd.DataFrame(
+            {
+                "category": ["planning a travel itinerary"],
+                "constraints": ["compare candidate plans by score, cost, and family suitability"],
+            }
+        )
+        config = GeneralistAgentEnvColumnConfig(
+            name="agent_env",
+            task_category_column="category",
+            context_columns=["constraints"],
+        )
+        generator = make_generator(config)
+        result = generator.generate(source_df)
+        environment_tuple = result.loc[0, "agent_env"]
+        dataset_path = tmp_path / "dataset.parquet"
+        pd.DataFrame({"agent_env": [environment_tuple]}).to_parquet(dataset_path)
+        restored = pd.read_parquet(dataset_path)
+
+        validation = verify_row_record(restored.loc[0], output_column="agent_env")
+
+        assert validation.passed is True
+        assert validation.answer == environment_tuple["reference_answer"]
+
 
 class TestGeneralistAgentEnvPreviewIntegration:
     def test_preview_generates_environment_tuple(self, tmp_path: Path) -> None:

From 3d356913172576393ca56ca3e45b3bfc0e84b5bb Mon Sep 17 00:00:00 2001
From: "Eric W. Tramel" <eric.tramel@gmail.com>
Date: Wed, 6 May 2026 22:04:14 -0400
Subject: [PATCH 4/4] Use generated data for Generalist environments

---
 catalog/plugins.json                          |  33 +-
 .../index.md                                  | 173 ++++-
 .../usage.md                                  | 153 +++-
 docs/plugins/index.md                         |   4 +-
 .../README.md                                 |  31 +-
 .../docs/index.md                             | 173 ++++-
 .../docs/usage.md                             | 153 +++-
 .../pyproject.toml                            |   5 +-
 .../config.py                                 | 254 +++---
 .../impl.py                                   | 729 +++++++++++++-----
 .../plugin.py                                 |  12 +-
 .../validation.py                             |   6 +-
 .../tests/test_plugin.py                      | 377 ++++++---
 13 files changed, 1584 insertions(+), 519 deletions(-)

diff --git a/catalog/plugins.json b/catalog/plugins.json
index ead8446..aa3f9e5 100644
--- a/catalog/plugins.json
+++ b/catalog/plugins.json
@@ -2,9 +2,9 @@
   "schema_version": 1,
   "plugins": [
     {
-      "name": "generalist-agent-env",
+      "name": "generalist-agent-environment",
       "plugin_type": "column-generator",
-      "description": "Generalist agent environment tuple generator for Data Designer",
+      "description": "Generalist agent environment and task assemblers for generated Data Designer data",
       "package": {
         "name": "data-designer-generalist-agent-env",
         "version": "0.1.0",
@@ -12,8 +12,33 @@
       },
       "entry_point": {
         "group": "data_designer.plugins",
-        "name": "generalist-agent-env",
-        "value": "data_designer_generalist_agent_env.plugin:plugin"
+        "name": "generalist-agent-environment",
+        "value": "data_designer_generalist_agent_env.plugin:environment_plugin"
+      },
+      "compatibility": {
+        "python": {
+          "specifier": ">=3.10"
+        },
+        "data_designer": {
+          "requirement": "data-designer>=0.5.9",
+          "specifier": ">=0.5.9",
+          "marker": null
+        }
+      }
+    },
+    {
+      "name": "generalist-agent-task",
+      "plugin_type": "column-generator",
+      "description": "Generalist agent environment and task assemblers for generated Data Designer data",
+      "package": {
+        "name": "data-designer-generalist-agent-env",
+        "version": "0.1.0",
+        "path": "plugins/data-designer-generalist-agent-env"
+      },
+      "entry_point": {
+        "group": "data_designer.plugins",
+        "name": "generalist-agent-task",
+        "value": "data_designer_generalist_agent_env.plugin:task_plugin"
       },
       "compatibility": {
         "python": {
diff --git a/docs/plugins/data-designer-generalist-agent-env/index.md b/docs/plugins/data-designer-generalist-agent-env/index.md
index 1880e4a..84d217c 100644
--- a/docs/plugins/data-designer-generalist-agent-env/index.md
+++ b/docs/plugins/data-designer-generalist-agent-env/index.md
@@ -1,13 +1,24 @@
 # data-designer-generalist-agent-env
 
-The `data-designer-generalist-agent-env` plugin adds a `generalist-agent-env`
-column type for creating Generalist-style agent environment tuples inspired by
-the DeepSeek-V3.2 automatic environment synthesis workflow.
-
-For each seed row, the plugin builds a row-local sandbox database, exposes
-task-specific tool functions, synthesizes a constrained task, emits a solution
-function that only calls tools and performs local logic, and emits a verifier
-function that checks candidate answers against the database.
+The `data-designer-generalist-agent-env` plugin adds a two-stage Generalist
+environment workflow for Data Designer. It is designed for workflows where Data
+Designer generates the topic, constraints, database schema, and database rows,
+then the plugin assembles those generated artifacts into executable RL rollout
+tuples.
+
+The workflow is:
+
+1. Use ordinary Data Designer columns, such as `llm-text` and `llm-structured`,
+   to generate a task topic and constraints.
+2. Use additional Data Designer generation columns to generate a row-local
+   database schema and records that follow that schema.
+3. Use `generalist-agent-environment` to validate and assemble the generated
+   schema and records into a sandbox with executable tools.
+4. Use `generalist-agent-task` to synthesize the task prompt, tool-only solution,
+   verifier, reference answer, and simple-to-hard augmentation trace.
+
+No search provider or external retrieval step is required, and the plugin does
+not fabricate fallback records.
 
 ## Installation
 
@@ -15,18 +26,30 @@ function that checks candidate answers against the database.
 uv add "data-designer>=0.5.9" data-designer-generalist-agent-env
 ```
 
-## Column type
+## Column types
 
-Use the `generalist-agent-env` column type when a dataset needs structured
-`<environment, tools, task, verifier>` records for agent training or evaluation.
+Use `generalist-agent-environment` to assemble the generated sandbox and toolset.
 
 | Field | Required | Description |
 | --- | --- | --- |
-| `name` | Yes | Output column name. |
-| `task_category_column` | Yes | Existing column containing a task category such as `planning a travel itinerary`. |
-| `context_columns` | No | Existing columns copied into the synthesized sandbox database context. |
+| `name` | Yes | Output environment column name. |
+| `task_topic_column` | Yes | Existing column containing a generated task topic such as `trip planning`. |
+| `task_constraints_column` | No | Existing column containing generated constraints as text, JSON, or a structured object. |
+| `database_schema_column` | Yes | Existing column containing the generated database schema. |
+| `database_records_column` | Yes | Existing column containing generated database records. |
+| `context_columns` | No | Existing columns copied into environment context. |
+
+Generated records must include `record_id`, `name`, `summary`, `cost`,
+`duration`, `score`, and `tags`. Additional fields are preserved, and an
+`attributes` object is recommended for topic-specific fields.
+
+Use `generalist-agent-task` to generate tasks from an environment.
+
+| Field | Required | Description |
+| --- | --- | --- |
+| `name` | Yes | Output task tuple column name. |
+| `environment_column` | Yes | Column containing a `generalist-agent-environment` artifact. |
 | `difficulty` | No | Final task difficulty: `simple`, `medium`, or `hard`; defaults to `hard`. |
-| `database_size` | No | Number of sandbox records to synthesize per row; defaults to `8`. |
 | `required_tag` | No | Optional tag that the valid answer must contain. |
 | `max_cost` | No | Optional maximum cost constraint. Unsatisfiable values are repaired upward. |
 | `min_score` | No | Optional minimum score constraint. Unsatisfiable values are repaired downward. |
@@ -40,35 +63,121 @@ from data_designer.config.seed_source_dataframe import DataFrameSeedSource
 
 seed_df = pd.DataFrame(
     {
-        "category": ["planning a travel itinerary"],
-        "constraints": ["compare candidate plans by score, cost, and family suitability"],
+        "seed": ["travel planning"],
+        "brief": ["family-friendly museums, moderate budget, reliable transport"],
     }
 )
 
+constraint_schema = {
+    "type": "object",
+    "properties": {
+        "goal": {"type": "string"},
+        "constraints": {"type": "array", "items": {"type": "string"}},
+        "success_criteria": {"type": "array", "items": {"type": "string"}},
+        "data_dimensions": {"type": "array", "items": {"type": "string"}},
+    },
+    "required": ["goal", "constraints", "success_criteria", "data_dimensions"],
+}
+
+database_schema_format = {
+    "type": "object",
+    "properties": {
+        "record_type": {"type": "string"},
+        "primary_key": {"type": "string", "const": "record_id"},
+        "fields": {"type": "array", "items": {"type": "object"}},
+        "attribute_fields": {"type": "array", "items": {"type": "object"}},
+    },
+    "required": ["record_type", "primary_key", "fields", "attribute_fields"],
+}
+
+records_format = {
+    "type": "object",
+    "properties": {
+        "records": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "record_id": {"type": "string"},
+                    "name": {"type": "string"},
+                    "summary": {"type": "string"},
+                    "cost": {"type": "integer"},
+                    "duration": {"type": "integer"},
+                    "score": {"type": "integer"},
+                    "tags": {"type": "array", "items": {"type": "string"}},
+                    "attributes": {"type": "object"},
+                },
+                "required": ["record_id", "name", "summary", "cost", "duration", "score", "tags"],
+            },
+        }
+    },
+    "required": ["records"],
+}
+
 builder = DataDesignerConfigBuilder()
 builder.with_seed_dataset(DataFrameSeedSource(df=seed_df))
 builder.add_column(
-    name="agent_env",
-    column_type="generalist-agent-env",
-    task_category_column="category",
-    context_columns=["constraints"],
-    required_tag="family",
+    name="task_topic",
+    column_type="llm-text",
+    model_alias="deepseek-v4-pro-live",
+    prompt="From {{ seed }} and {{ brief }}, write a concise task topic.",
+)
+builder.add_column(
+    name="task_constraints",
+    column_type="llm-structured",
+    model_alias="deepseek-v4-pro-live",
+    prompt="Generate constraints for topic {{ task_topic }} with brief {{ brief }}.",
+    output_format=constraint_schema,
+)
+builder.add_column(
+    name="database_schema",
+    column_type="llm-structured",
+    model_alias="deepseek-v4-pro-live",
+    prompt="Generate a database schema for topic {{ task_topic }} and constraints {{ task_constraints }}.",
+    output_format=database_schema_format,
+)
+builder.add_column(
+    name="database_records",
+    column_type="llm-structured",
+    model_alias="deepseek-v4-pro-live",
+    prompt=(
+        "Generate 8 records that follow schema {{ database_schema }} for topic "
+        "{{ task_topic }} and constraints {{ task_constraints }}. Include varied "
+        "cost, duration, score, tags, and attributes."
+    ),
+    output_format=records_format,
+)
+builder.add_column(
+    name="agent_environment",
+    column_type="generalist-agent-environment",
+    task_topic_column="task_topic",
+    task_constraints_column="task_constraints",
+    database_schema_column="database_schema",
+    database_records_column="database_records",
+    context_columns=["brief"],
+)
+builder.add_column(
+    name="agent_task",
+    column_type="generalist-agent-task",
+    environment_column="agent_environment",
+    difficulty="hard",
+    required_tag="reliable",
 )
 ```
 
-The generated `agent_env` value is a dictionary with these top-level keys:
+The generated `agent_task` value is a dictionary with these top-level keys:
 
 | Key | Description |
 | --- | --- |
-| `environment` | Sandbox metadata, row-local database, schema, and source context. |
+| `environment` | Sandbox metadata, generated database schema, generated records, and source context. |
 | `tools` | Synthesized tool descriptors and Python function sources. |
-| `tool_module_source` | Executable Python source defining the hidden database and selected tools. |
+| `tool_module_source` | Executable Python source defining the generated schema, generated database, and selected tools. |
 | `task` | Prompt, difficulty, constraints, and answer schema. |
 | `solution` | Python `solve(tools)` source restricted to tool calls and local logic. |
 | `verifier` | Python `verify(answer, database)` source and reference validation status. |
 | `reference_answer` | The generated solution output that the verifier accepts. |
 | `task_iterations` | Simple-to-final task, solution, verifier, and augmentation artifacts. |
-| `synthesis_trace` | Environment construction, task synthesis, tool augmentation, solution, and verification events. |
+| `synthesis_trace` | Topic/constraint intake, schema intake, generated-data intake, task synthesis, solution, and verification events. |
 
 ## Row validation helper
 
@@ -79,13 +188,13 @@ solution, checks the generated verifier, and replays every task iteration:
 ```python
 from data_designer_generalist_agent_env.validation import verify_row_record
 
-validation = verify_row_record(result.dataset.loc[0], output_column="agent_env")
+validation = verify_row_record(result.dataset.loc[0], output_column="agent_task")
 assert validation.passed, validation.errors
 ```
 
-## Behavior Notes
+## Behavior notes
 
-The plugin is deterministic and does not call the Internet. It records `bash`
-and `search` as base sandbox tools and uses the seed row to synthesize the
-sandbox database locally. Downstream workflows can replace or augment that
-database with retrieved records before using the generated task and verifier.
+The plugin does not generate the grounding records. It requires generated
+schema and generated records from upstream Data Designer columns, validates the
+minimum executable contract, and then builds tools and verifiers around that
+generated data.
diff --git a/docs/plugins/data-designer-generalist-agent-env/usage.md b/docs/plugins/data-designer-generalist-agent-env/usage.md
index 8ed6215..4217a6c 100644
--- a/docs/plugins/data-designer-generalist-agent-env/usage.md
+++ b/docs/plugins/data-designer-generalist-agent-env/usage.md
@@ -1,8 +1,9 @@
 # Usage
 
-This example creates one Generalist agent environment tuple from a trip-planning
-category. The same pattern works for other task categories where searching the
-candidate space is harder than verifying a proposed answer.
+This example creates one Generalist RL rollout tuple from generated data. The
+model generates the topic, constraints, database schema, and database records.
+The plugin assembles those generated artifacts, adds executable tools, and then
+synthesizes a task, tool-only solution, and verifier.
 
 ```python
 import pandas as pd
@@ -10,26 +11,118 @@ from data_designer.config.config_builder import DataDesignerConfigBuilder
 from data_designer.config.seed_source_dataframe import DataFrameSeedSource
 from data_designer.interface.data_designer import DataDesigner
 
+constraint_schema = {
+    "type": "object",
+    "properties": {
+        "goal": {"type": "string"},
+        "constraints": {"type": "array", "items": {"type": "string"}},
+        "success_criteria": {"type": "array", "items": {"type": "string"}},
+        "data_dimensions": {"type": "array", "items": {"type": "string"}},
+    },
+    "required": ["goal", "constraints", "success_criteria", "data_dimensions"],
+}
+
+database_schema_format = {
+    "type": "object",
+    "properties": {
+        "record_type": {"type": "string"},
+        "primary_key": {"type": "string"},
+        "fields": {"type": "array", "items": {"type": "object"}},
+        "attribute_fields": {"type": "array", "items": {"type": "object"}},
+    },
+    "required": ["record_type", "primary_key", "fields", "attribute_fields"],
+}
+
+records_format = {
+    "type": "object",
+    "properties": {
+        "records": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "record_id": {"type": "string"},
+                    "name": {"type": "string"},
+                    "summary": {"type": "string"},
+                    "cost": {"type": "integer"},
+                    "duration": {"type": "integer"},
+                    "score": {"type": "integer"},
+                    "tags": {"type": "array", "items": {"type": "string"}},
+                    "attributes": {"type": "object"},
+                },
+                "required": ["record_id", "name", "summary", "cost", "duration", "score", "tags"],
+            },
+        }
+    },
+    "required": ["records"],
+}
+
 seed_df = pd.DataFrame(
     {
-        "category": ["planning a travel itinerary"],
-        "constraints": ["family-friendly museums, moderate budget, reliable transport"],
+        "seed": ["planning a travel itinerary"],
+        "brief": ["family-friendly museums, moderate budget, reliable transport"],
     }
 )
 
 builder = DataDesignerConfigBuilder()
 builder.with_seed_dataset(DataFrameSeedSource(df=seed_df))
 builder.add_column(
-    name="agent_env",
-    column_type="generalist-agent-env",
-    task_category_column="category",
-    context_columns=["constraints"],
+    name="task_topic",
+    column_type="llm-text",
+    model_alias="deepseek-v4-pro-live",
+    prompt="From this seed {{ seed }}, write a concise task topic.",
+)
+builder.add_column(
+    name="task_constraints",
+    column_type="llm-structured",
+    model_alias="deepseek-v4-pro-live",
+    prompt=(
+        "For topic {{ task_topic }} and brief {{ brief }}, generate constraints "
+        "that make the task hard to solve but easy to verify."
+    ),
+    output_format=constraint_schema,
+)
+builder.add_column(
+    name="database_schema",
+    column_type="llm-structured",
+    model_alias="deepseek-v4-pro-live",
+    prompt=(
+        "Generate a database schema for topic {{ task_topic }} and constraints "
+        "{{ task_constraints }}. Include record_id, name, summary, cost, "
+        "duration, score, tags, and topic-specific attributes."
+    ),
+    output_format=database_schema_format,
+)
+builder.add_column(
+    name="database_records",
+    column_type="llm-structured",
+    model_alias="deepseek-v4-pro-live",
+    prompt=(
+        "Generate 8 diverse records that follow schema {{ database_schema }} "
+        "for topic {{ task_topic }} and constraints {{ task_constraints }}. "
+        "At least two records must include the tag reliable."
+    ),
+    output_format=records_format,
+)
+builder.add_column(
+    name="agent_environment",
+    column_type="generalist-agent-environment",
+    task_topic_column="task_topic",
+    task_constraints_column="task_constraints",
+    database_schema_column="database_schema",
+    database_records_column="database_records",
+    context_columns=["brief"],
+)
+builder.add_column(
+    name="agent_task",
+    column_type="generalist-agent-task",
+    environment_column="agent_environment",
     difficulty="hard",
-    required_tag="family",
+    required_tag="reliable",
 )
 
 result = DataDesigner(artifact_path="artifacts").preview(builder, num_records=1)
-environment_tuple = result.dataset.loc[0, "agent_env"]
+environment_tuple = result.dataset.loc[0, "agent_task"]
 ```
 
 The generated row can be validated with the package helper:
@@ -42,7 +135,37 @@ assert validation.passed, validation.errors
 assert validation.answer == environment_tuple["reference_answer"]
 ```
 
-The output task is intentionally search-like: the solving agent must inspect,
-filter, and rank records through the tool interface. The verifier remains
-straightforward because it checks fixed constraints and a deterministic
-tie-break order directly against the database.
+The output task is intentionally search-like: the solving agent must inspect the
+generated schema, filter records, and rank candidates through the tool interface.
+The verifier remains straightforward because it checks fixed constraints and a
+deterministic tie-break order directly against the generated database.
+
+## Expected output shape
+
+`generalist-agent-environment` emits:
+
+```text
+schema_version
+environment.database_schema
+environment.database
+environment.data_generation
+tools
+tool_module_source
+synthesis_trace
+```
+
+`generalist-agent-task` emits:
+
+```text
+schema_version
+environment
+tools
+tool_module_source
+task
+solution
+verifier
+reference_answer
+task_iterations
+synthesis_trace
+rl_filter_note
+```
diff --git a/docs/plugins/index.md b/docs/plugins/index.md
index 6349dd5..1c74551 100644
--- a/docs/plugins/index.md
+++ b/docs/plugins/index.md
@@ -10,10 +10,10 @@ Browse available Data Designer plugins by what they add to your data generation
       <span class="plugin-doc-card__title">data-designer-generalist-agent-env</span>
       <span class="plugin-doc-card__version">v0.1.0</span>
     </span>
-    <span class="plugin-doc-card__description">Generalist agent environment tuple generator for Data Designer</span>
+    <span class="plugin-doc-card__description">Generalist agent environment and task assemblers for generated Data Designer data</span>
     <span class="plugin-doc-card__section">
       <span class="plugin-doc-card__label">Column types</span>
-      <span class="plugin-doc-card__chips"><span class="plugin-doc-chip">generalist-agent-env</span></span>
+      <span class="plugin-doc-card__chips"><span class="plugin-doc-chip">generalist-agent-environment</span><span class="plugin-doc-chip">generalist-agent-task</span></span>
     </span>
   </a>
   <a class="plugin-doc-card" href="data-designer-template/" aria-label="Open data-designer-template documentation">
diff --git a/plugins/data-designer-generalist-agent-env/README.md b/plugins/data-designer-generalist-agent-env/README.md
index 76115dc..9f91ef3 100644
--- a/plugins/data-designer-generalist-agent-env/README.md
+++ b/plugins/data-designer-generalist-agent-env/README.md
@@ -1,8 +1,10 @@
 # data-designer-generalist-agent-env
 
-Generate Generalist-style agent environment tuples from seed task categories.
-Each output value contains a sandbox database, synthesized task-specific tool
-functions, a task prompt, a tool-only solution function, and a verifier function.
+Generate Generalist-style agent environments and tasks from Data Designer
+generated topics, constraints, database schemas, and records. The plugin
+assembles generated grounding data into executable tool environments, then
+generates task prompts, tool-only solution functions, and verifier functions
+from those environments.
 
 ## Installation
 
@@ -12,18 +14,29 @@ uv add "data-designer>=0.5.9" data-designer-generalist-agent-env
 
 ## Usage
 
-Once installed, the `generalist-agent-env` column type is automatically discovered by
+Once installed, the `generalist-agent-environment` and
+`generalist-agent-task` column types are automatically discovered by
 [NeMo Data Designer](https://github.com/NVIDIA-NeMo/DataDesigner).
 
-Configure the column with a task category column and optional context columns:
+Configure the workflow after generating a task topic, constraints, schema, and
+records:
 
 ```python
 builder.add_column(
-    name="agent_env",
-    column_type="generalist-agent-env",
-    task_category_column="category",
-    context_columns=["constraints"],
+    name="agent_environment",
+    column_type="generalist-agent-environment",
+    task_topic_column="task_topic",
+    task_constraints_column="task_constraints",
+    database_schema_column="database_schema",
+    database_records_column="database_records",
+    context_columns=["brief"],
+)
+builder.add_column(
+    name="agent_task",
+    column_type="generalist-agent-task",
+    environment_column="agent_environment",
     difficulty="hard",
+    required_tag="reliable",
 )
 ```
 
diff --git a/plugins/data-designer-generalist-agent-env/docs/index.md b/plugins/data-designer-generalist-agent-env/docs/index.md
index 1880e4a..84d217c 100644
--- a/plugins/data-designer-generalist-agent-env/docs/index.md
+++ b/plugins/data-designer-generalist-agent-env/docs/index.md
@@ -1,13 +1,24 @@
 # data-designer-generalist-agent-env
 
-The `data-designer-generalist-agent-env` plugin adds a `generalist-agent-env`
-column type for creating Generalist-style agent environment tuples inspired by
-the DeepSeek-V3.2 automatic environment synthesis workflow.
-
-For each seed row, the plugin builds a row-local sandbox database, exposes
-task-specific tool functions, synthesizes a constrained task, emits a solution
-function that only calls tools and performs local logic, and emits a verifier
-function that checks candidate answers against the database.
+The `data-designer-generalist-agent-env` plugin adds a two-stage Generalist
+environment workflow for Data Designer. It is designed for workflows where Data
+Designer generates the topic, constraints, database schema, and database rows,
+then the plugin assembles those generated artifacts into executable RL rollout
+tuples.
+
+The workflow is:
+
+1. Use ordinary Data Designer columns, such as `llm-text` and `llm-structured`,
+   to generate a task topic and constraints.
+2. Use additional Data Designer generation columns to generate a row-local
+   database schema and records that follow that schema.
+3. Use `generalist-agent-environment` to validate and assemble the generated
+   schema and records into a sandbox with executable tools.
+4. Use `generalist-agent-task` to synthesize the task prompt, tool-only solution,
+   verifier, reference answer, and simple-to-hard augmentation trace.
+
+No search provider or external retrieval step is required, and the plugin does
+not fabricate fallback records.
 
 ## Installation
 
@@ -15,18 +26,30 @@ function that checks candidate answers against the database.
 uv add "data-designer>=0.5.9" data-designer-generalist-agent-env
 ```
 
-## Column type
+## Column types
 
-Use the `generalist-agent-env` column type when a dataset needs structured
-`<environment, tools, task, verifier>` records for agent training or evaluation.
+Use `generalist-agent-environment` to assemble the generated sandbox and toolset.
 
 | Field | Required | Description |
 | --- | --- | --- |
-| `name` | Yes | Output column name. |
-| `task_category_column` | Yes | Existing column containing a task category such as `planning a travel itinerary`. |
-| `context_columns` | No | Existing columns copied into the synthesized sandbox database context. |
+| `name` | Yes | Output environment column name. |
+| `task_topic_column` | Yes | Existing column containing a generated task topic such as `trip planning`. |
+| `task_constraints_column` | No | Existing column containing generated constraints as text, JSON, or a structured object. |
+| `database_schema_column` | Yes | Existing column containing the generated database schema. |
+| `database_records_column` | Yes | Existing column containing generated database records. |
+| `context_columns` | No | Existing columns copied into environment context. |
+
+Generated records must include `record_id`, `name`, `summary`, `cost`,
+`duration`, `score`, and `tags`. Additional fields are preserved, and an
+`attributes` object is recommended for topic-specific fields.
+
+Use `generalist-agent-task` to generate tasks from an environment.
+
+| Field | Required | Description |
+| --- | --- | --- |
+| `name` | Yes | Output task tuple column name. |
+| `environment_column` | Yes | Column containing a `generalist-agent-environment` artifact. |
 | `difficulty` | No | Final task difficulty: `simple`, `medium`, or `hard`; defaults to `hard`. |
-| `database_size` | No | Number of sandbox records to synthesize per row; defaults to `8`. |
 | `required_tag` | No | Optional tag that the valid answer must contain. |
 | `max_cost` | No | Optional maximum cost constraint. Unsatisfiable values are repaired upward. |
 | `min_score` | No | Optional minimum score constraint. Unsatisfiable values are repaired downward. |
@@ -40,35 +63,121 @@ from data_designer.config.seed_source_dataframe import DataFrameSeedSource
 
 seed_df = pd.DataFrame(
     {
-        "category": ["planning a travel itinerary"],
-        "constraints": ["compare candidate plans by score, cost, and family suitability"],
+        "seed": ["travel planning"],
+        "brief": ["family-friendly museums, moderate budget, reliable transport"],
     }
 )
 
+constraint_schema = {
+    "type": "object",
+    "properties": {
+        "goal": {"type": "string"},
+        "constraints": {"type": "array", "items": {"type": "string"}},
+        "success_criteria": {"type": "array", "items": {"type": "string"}},
+        "data_dimensions": {"type": "array", "items": {"type": "string"}},
+    },
+    "required": ["goal", "constraints", "success_criteria", "data_dimensions"],
+}
+
+database_schema_format = {
+    "type": "object",
+    "properties": {
+        "record_type": {"type": "string"},
+        "primary_key": {"type": "string", "const": "record_id"},
+        "fields": {"type": "array", "items": {"type": "object"}},
+        "attribute_fields": {"type": "array", "items": {"type": "object"}},
+    },
+    "required": ["record_type", "primary_key", "fields", "attribute_fields"],
+}
+
+records_format = {
+    "type": "object",
+    "properties": {
+        "records": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "record_id": {"type": "string"},
+                    "name": {"type": "string"},
+                    "summary": {"type": "string"},
+                    "cost": {"type": "integer"},
+                    "duration": {"type": "integer"},
+                    "score": {"type": "integer"},
+                    "tags": {"type": "array", "items": {"type": "string"}},
+                    "attributes": {"type": "object"},
+                },
+                "required": ["record_id", "name", "summary", "cost", "duration", "score", "tags"],
+            },
+        }
+    },
+    "required": ["records"],
+}
+
 builder = DataDesignerConfigBuilder()
 builder.with_seed_dataset(DataFrameSeedSource(df=seed_df))
 builder.add_column(
-    name="agent_env",
-    column_type="generalist-agent-env",
-    task_category_column="category",
-    context_columns=["constraints"],
-    required_tag="family",
+    name="task_topic",
+    column_type="llm-text",
+    model_alias="deepseek-v4-pro-live",
+    prompt="From {{ seed }} and {{ brief }}, write a concise task topic.",
+)
+builder.add_column(
+    name="task_constraints",
+    column_type="llm-structured",
+    model_alias="deepseek-v4-pro-live",
+    prompt="Generate constraints for topic {{ task_topic }} with brief {{ brief }}.",
+    output_format=constraint_schema,
+)
+builder.add_column(
+    name="database_schema",
+    column_type="llm-structured",
+    model_alias="deepseek-v4-pro-live",
+    prompt="Generate a database schema for topic {{ task_topic }} and constraints {{ task_constraints }}.",
+    output_format=database_schema_format,
+)
+builder.add_column(
+    name="database_records",
+    column_type="llm-structured",
+    model_alias="deepseek-v4-pro-live",
+    prompt=(
+        "Generate 8 records that follow schema {{ database_schema }} for topic "
+        "{{ task_topic }} and constraints {{ task_constraints }}. Include varied "
+        "cost, duration, score, tags, and attributes."
+    ),
+    output_format=records_format,
+)
+builder.add_column(
+    name="agent_environment",
+    column_type="generalist-agent-environment",
+    task_topic_column="task_topic",
+    task_constraints_column="task_constraints",
+    database_schema_column="database_schema",
+    database_records_column="database_records",
+    context_columns=["brief"],
+)
+builder.add_column(
+    name="agent_task",
+    column_type="generalist-agent-task",
+    environment_column="agent_environment",
+    difficulty="hard",
+    required_tag="reliable",
 )
 ```
 
-The generated `agent_env` value is a dictionary with these top-level keys:
+The generated `agent_task` value is a dictionary with these top-level keys:
 
 | Key | Description |
 | --- | --- |
-| `environment` | Sandbox metadata, row-local database, schema, and source context. |
+| `environment` | Sandbox metadata, generated database schema, generated records, and source context. |
 | `tools` | Synthesized tool descriptors and Python function sources. |
-| `tool_module_source` | Executable Python source defining the hidden database and selected tools. |
+| `tool_module_source` | Executable Python source defining the generated schema, generated database, and selected tools. |
 | `task` | Prompt, difficulty, constraints, and answer schema. |
 | `solution` | Python `solve(tools)` source restricted to tool calls and local logic. |
 | `verifier` | Python `verify(answer, database)` source and reference validation status. |
 | `reference_answer` | The generated solution output that the verifier accepts. |
 | `task_iterations` | Simple-to-final task, solution, verifier, and augmentation artifacts. |
-| `synthesis_trace` | Environment construction, task synthesis, tool augmentation, solution, and verification events. |
+| `synthesis_trace` | Topic/constraint intake, schema intake, generated-data intake, task synthesis, solution, and verification events. |
 
 ## Row validation helper
 
@@ -79,13 +188,13 @@ solution, checks the generated verifier, and replays every task iteration:
 ```python
 from data_designer_generalist_agent_env.validation import verify_row_record
 
-validation = verify_row_record(result.dataset.loc[0], output_column="agent_env")
+validation = verify_row_record(result.dataset.loc[0], output_column="agent_task")
 assert validation.passed, validation.errors
 ```
 
-## Behavior Notes
+## Behavior notes
 
-The plugin is deterministic and does not call the Internet. It records `bash`
-and `search` as base sandbox tools and uses the seed row to synthesize the
-sandbox database locally. Downstream workflows can replace or augment that
-database with retrieved records before using the generated task and verifier.
+The plugin does not generate the grounding records. It requires generated
+schema and generated records from upstream Data Designer columns, validates the
+minimum executable contract, and then builds tools and verifiers around that
+generated data.
diff --git a/plugins/data-designer-generalist-agent-env/docs/usage.md b/plugins/data-designer-generalist-agent-env/docs/usage.md
index 8ed6215..4217a6c 100644
--- a/plugins/data-designer-generalist-agent-env/docs/usage.md
+++ b/plugins/data-designer-generalist-agent-env/docs/usage.md
@@ -1,8 +1,9 @@
 # Usage
 
-This example creates one Generalist agent environment tuple from a trip-planning
-category. The same pattern works for other task categories where searching the
-candidate space is harder than verifying a proposed answer.
+This example creates one Generalist RL rollout tuple from generated data. The
+model generates the topic, constraints, database schema, and database records.
+The plugin assembles those generated artifacts, adds executable tools, and then
+synthesizes a task, tool-only solution, and verifier.
 
 ```python
 import pandas as pd
@@ -10,26 +11,118 @@ from data_designer.config.config_builder import DataDesignerConfigBuilder
 from data_designer.config.seed_source_dataframe import DataFrameSeedSource
 from data_designer.interface.data_designer import DataDesigner
 
+constraint_schema = {
+    "type": "object",
+    "properties": {
+        "goal": {"type": "string"},
+        "constraints": {"type": "array", "items": {"type": "string"}},
+        "success_criteria": {"type": "array", "items": {"type": "string"}},
+        "data_dimensions": {"type": "array", "items": {"type": "string"}},
+    },
+    "required": ["goal", "constraints", "success_criteria", "data_dimensions"],
+}
+
+database_schema_format = {
+    "type": "object",
+    "properties": {
+        "record_type": {"type": "string"},
+        "primary_key": {"type": "string"},
+        "fields": {"type": "array", "items": {"type": "object"}},
+        "attribute_fields": {"type": "array", "items": {"type": "object"}},
+    },
+    "required": ["record_type", "primary_key", "fields", "attribute_fields"],
+}
+
+records_format = {
+    "type": "object",
+    "properties": {
+        "records": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "record_id": {"type": "string"},
+                    "name": {"type": "string"},
+                    "summary": {"type": "string"},
+                    "cost": {"type": "integer"},
+                    "duration": {"type": "integer"},
+                    "score": {"type": "integer"},
+                    "tags": {"type": "array", "items": {"type": "string"}},
+                    "attributes": {"type": "object"},
+                },
+                "required": ["record_id", "name", "summary", "cost", "duration", "score", "tags"],
+            },
+        }
+    },
+    "required": ["records"],
+}
+
 seed_df = pd.DataFrame(
     {
-        "category": ["planning a travel itinerary"],
-        "constraints": ["family-friendly museums, moderate budget, reliable transport"],
+        "seed": ["planning a travel itinerary"],
+        "brief": ["family-friendly museums, moderate budget, reliable transport"],
     }
 )
 
 builder = DataDesignerConfigBuilder()
 builder.with_seed_dataset(DataFrameSeedSource(df=seed_df))
 builder.add_column(
-    name="agent_env",
-    column_type="generalist-agent-env",
-    task_category_column="category",
-    context_columns=["constraints"],
+    name="task_topic",
+    column_type="llm-text",
+    model_alias="deepseek-v4-pro-live",
+    prompt="From this seed {{ seed }}, write a concise task topic.",
+)
+builder.add_column(
+    name="task_constraints",
+    column_type="llm-structured",
+    model_alias="deepseek-v4-pro-live",
+    prompt=(
+        "For topic {{ task_topic }} and brief {{ brief }}, generate constraints "
+        "that make the task hard to solve but easy to verify."
+    ),
+    output_format=constraint_schema,
+)
+builder.add_column(
+    name="database_schema",
+    column_type="llm-structured",
+    model_alias="deepseek-v4-pro-live",
+    prompt=(
+        "Generate a database schema for topic {{ task_topic }} and constraints "
+        "{{ task_constraints }}. Include record_id, name, summary, cost, "
+        "duration, score, tags, and topic-specific attributes."
+    ),
+    output_format=database_schema_format,
+)
+builder.add_column(
+    name="database_records",
+    column_type="llm-structured",
+    model_alias="deepseek-v4-pro-live",
+    prompt=(
+        "Generate 8 diverse records that follow schema {{ database_schema }} "
+        "for topic {{ task_topic }} and constraints {{ task_constraints }}. "
+        "At least two records must include the tag reliable."
+    ),
+    output_format=records_format,
+)
+builder.add_column(
+    name="agent_environment",
+    column_type="generalist-agent-environment",
+    task_topic_column="task_topic",
+    task_constraints_column="task_constraints",
+    database_schema_column="database_schema",
+    database_records_column="database_records",
+    context_columns=["brief"],
+)
+builder.add_column(
+    name="agent_task",
+    column_type="generalist-agent-task",
+    environment_column="agent_environment",
     difficulty="hard",
-    required_tag="family",
+    required_tag="reliable",
 )
 
 result = DataDesigner(artifact_path="artifacts").preview(builder, num_records=1)
-environment_tuple = result.dataset.loc[0, "agent_env"]
+environment_tuple = result.dataset.loc[0, "agent_task"]
 ```
 
 The generated row can be validated with the package helper:
@@ -42,7 +135,37 @@ assert validation.passed, validation.errors
 assert validation.answer == environment_tuple["reference_answer"]
 ```
 
-The output task is intentionally search-like: the solving agent must inspect,
-filter, and rank records through the tool interface. The verifier remains
-straightforward because it checks fixed constraints and a deterministic
-tie-break order directly against the database.
+The output task is intentionally search-like: the solving agent must inspect the
+generated schema, filter records, and rank candidates through the tool interface.
+The verifier remains straightforward because it checks fixed constraints and a
+deterministic tie-break order directly against the generated database.
+
+## Expected output shape
+
+`generalist-agent-environment` emits:
+
+```text
+schema_version
+environment.database_schema
+environment.database
+environment.data_generation
+tools
+tool_module_source
+synthesis_trace
+```
+
+`generalist-agent-task` emits:
+
+```text
+schema_version
+environment
+tools
+tool_module_source
+task
+solution
+verifier
+reference_answer
+task_iterations
+synthesis_trace
+rl_filter_note
+```
diff --git a/plugins/data-designer-generalist-agent-env/pyproject.toml b/plugins/data-designer-generalist-agent-env/pyproject.toml
index 14bf78a..ffc0d8e 100644
--- a/plugins/data-designer-generalist-agent-env/pyproject.toml
+++ b/plugins/data-designer-generalist-agent-env/pyproject.toml
@@ -4,7 +4,7 @@
 [project]
 name = "data-designer-generalist-agent-env"
 version = "0.1.0"
-description = "Generalist agent environment tuple generator for Data Designer"
+description = "Generalist agent environment and task assemblers for generated Data Designer data"
 requires-python = ">=3.10"
 dependencies = [
     "data-designer>=0.5.9",
@@ -20,7 +20,8 @@ classifiers = [
 ]
 
 [project.entry-points."data_designer.plugins"]
-generalist-agent-env = "data_designer_generalist_agent_env.plugin:plugin"
+generalist-agent-environment = "data_designer_generalist_agent_env.plugin:environment_plugin"
+generalist-agent-task = "data_designer_generalist_agent_env.plugin:task_plugin"
 
 [project.urls]
 Repository = "https://github.com/NVIDIA-NeMo/DataDesignerPlugins"
diff --git a/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/config.py b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/config.py
index 214349d..6ade46a 100644
--- a/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/config.py
+++ b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/config.py
@@ -12,32 +12,173 @@
 Difficulty = Literal["simple", "medium", "hard"]
 
 
-class GeneralistAgentEnvColumnConfig(SingleColumnConfig):
-    """Configuration for synthesizing Generalist agent environment tuples.
+def normalize_column_name(value: str, field_name: str) -> str:
+    """Normalize and validate one column name.
 
-    The generator consumes a task category column and optional context columns,
-    then writes one structured environment/task/verifier tuple per input row.
+    Args:
+        value: Candidate column name.
+        field_name: Name used in validation messages.
+
+    Returns:
+        The stripped column name.
+
+    Raises:
+        ValueError: If the column name is empty.
+    """
+    value = value.strip()
+    if not value:
+        raise ValueError(f"{field_name} must not be empty")
+    return value
+
+
+def normalize_context_columns(value: list[str]) -> list[str]:
+    """Validate and de-duplicate context column names.
+
+    Args:
+        value: Candidate context column names.
+
+    Returns:
+        Context column names with duplicates removed while preserving order.
+
+    Raises:
+        ValueError: If any context column name is empty.
     """
+    columns: list[str] = []
+    for column in value:
+        column = normalize_column_name(column, "context_columns")
+        if column not in columns:
+            columns.append(column)
+    return columns
+
+
+def normalize_required_tag(value: str | None) -> str | None:
+    """Normalize an optional required tag.
 
-    column_type: Literal["generalist-agent-env"] = "generalist-agent-env"
+    Args:
+        value: Candidate tag value.
 
-    task_category_column: str = Field(
-        description="Input column containing the task category, such as 'travel itinerary planning'.",
+    Returns:
+        A lower-cased tag, or ``None`` when unset.
+
+    Raises:
+        ValueError: If the tag contains only whitespace.
+    """
+    if value is None:
+        return None
+    value = value.strip().lower()
+    if not value:
+        raise ValueError("required_tag must not be empty when provided")
+    return value
+
+
+class GeneralistAgentEnvironmentColumnConfig(SingleColumnConfig):
+    """Configuration for constructing generated Generalist sandbox environments.
+
+    The generator consumes Data Designer generated topic, constraints, database
+    schema, and database records, then emits a row-local environment with
+    executable tool implementations over those generated records.
+    """
+
+    column_type: Literal["generalist-agent-environment"] = "generalist-agent-environment"
+
+    task_topic_column: str = Field(
+        description="Input column containing a generated task topic, such as 'trip planning'.",
+    )
+    task_constraints_column: str | None = Field(
+        default=None,
+        description="Optional input column containing generated constraints as text, JSON, or a structured object.",
+    )
+    database_schema_column: str = Field(
+        description="Input column containing the generated database schema for this row.",
+    )
+    database_records_column: str = Field(
+        description="Input column containing generated database records for this row.",
     )
     context_columns: list[str] = Field(
         default_factory=list,
-        description="Optional seed columns copied into the synthesized sandbox database context.",
+        description="Optional seed columns copied into environment context.",
+    )
+
+    @staticmethod
+    def get_column_emoji() -> str:
+        return "🧱"
+
+    @field_validator("task_topic_column")
+    @classmethod
+    def validate_task_topic_column(cls, value: str) -> str:
+        """Validate the task topic source column name."""
+        return normalize_column_name(value, "task_topic_column")
+
+    @field_validator("task_constraints_column")
+    @classmethod
+    def validate_task_constraints_column(cls, value: str | None) -> str | None:
+        """Validate the optional task constraints source column name."""
+        if value is None:
+            return None
+        return normalize_column_name(value, "task_constraints_column")
+
+    @field_validator("database_schema_column")
+    @classmethod
+    def validate_database_schema_column(cls, value: str) -> str:
+        """Validate the generated database schema source column name."""
+        return normalize_column_name(value, "database_schema_column")
+
+    @field_validator("database_records_column")
+    @classmethod
+    def validate_database_records_column(cls, value: str) -> str:
+        """Validate the generated database records source column name."""
+        return normalize_column_name(value, "database_records_column")
+
+    @field_validator("context_columns")
+    @classmethod
+    def validate_context_columns(cls, value: list[str]) -> list[str]:
+        """Validate context column names."""
+        return normalize_context_columns(value)
+
+    @model_validator(mode="after")
+    def validate_distinct_columns(self) -> Self:
+        """Validate cross-field column references."""
+        named_columns = [
+            self.task_topic_column,
+            self.database_schema_column,
+            self.database_records_column,
+            *self.context_columns,
+        ]
+        if self.task_constraints_column is not None:
+            named_columns.append(self.task_constraints_column)
+        if len(named_columns) != len(set(named_columns)):
+            raise ValueError(
+                "task_topic_column, task_constraints_column, database_schema_column, "
+                "database_records_column, and context_columns must be distinct"
+            )
+        return self
+
+    @property
+    def required_columns(self) -> list[str]:
+        columns = [self.task_topic_column]
+        if self.task_constraints_column is not None:
+            columns.append(self.task_constraints_column)
+        columns.extend([self.database_schema_column, self.database_records_column])
+        columns.extend(self.context_columns)
+        return columns
+
+    @property
+    def side_effect_columns(self) -> list[str]:
+        return []
+
+
+class GeneralistAgentTaskColumnConfig(SingleColumnConfig):
+    """Configuration for synthesizing tasks from generated environments."""
+
+    column_type: Literal["generalist-agent-task"] = "generalist-agent-task"
+
+    environment_column: str = Field(
+        description="Column containing a generalist-agent-environment artifact.",
     )
     difficulty: Difficulty = Field(
         default="hard",
         description="Final task difficulty to synthesize after the simple-to-hard iteration trace.",
     )
-    database_size: int = Field(
-        default=8,
-        ge=3,
-        le=30,
-        description="Number of records to synthesize into the sandbox database for each row.",
-    )
     required_tag: str | None = Field(
         default=None,
         description="Optional tag that every valid solution candidate must contain.",
@@ -45,99 +186,34 @@ class GeneralistAgentEnvColumnConfig(SingleColumnConfig):
     max_cost: int | None = Field(
         default=None,
         ge=1,
-        description="Optional maximum cost constraint for the final task; repaired upward if it makes the task unsat.",
+        description="Optional maximum cost constraint. Unsatisfiable values are repaired upward.",
     )
     min_score: int | None = Field(
         default=None,
         ge=0,
         le=100,
-        description="Optional minimum score constraint for the final task; repaired downward if it makes the task unsat.",
+        description="Optional minimum score constraint. Unsatisfiable values are repaired downward.",
     )
 
     @staticmethod
     def get_column_emoji() -> str:
-        return "🧰"
+        return "🧪"
 
-    @field_validator("task_category_column")
+    @field_validator("environment_column")
     @classmethod
-    def validate_task_category_column(cls, value: str) -> str:
-        """Validate the task category source column name.
-
-        Args:
-            value: Candidate column name.
-
-        Returns:
-            The stripped column name.
-
-        Raises:
-            ValueError: If the column name is empty.
-        """
-        value = value.strip()
-        if not value:
-            raise ValueError("task_category_column must not be empty")
-        return value
-
-    @field_validator("context_columns")
-    @classmethod
-    def validate_context_columns(cls, value: list[str]) -> list[str]:
-        """Validate and de-duplicate context column names.
-
-        Args:
-            value: Candidate context column names.
-
-        Returns:
-            Context column names with duplicates removed while preserving order.
-
-        Raises:
-            ValueError: If any context column name is empty.
-        """
-        columns: list[str] = []
-        for column in value:
-            column = column.strip()
-            if not column:
-                raise ValueError("context_columns must not contain empty column names")
-            if column not in columns:
-                columns.append(column)
-        return columns
+    def validate_environment_column(cls, value: str) -> str:
+        """Validate the environment source column name."""
+        return normalize_column_name(value, "environment_column")
 
     @field_validator("required_tag")
     @classmethod
     def validate_required_tag(cls, value: str | None) -> str | None:
-        """Normalize the optional required tag.
-
-        Args:
-            value: Candidate tag value.
-
-        Returns:
-            A lower-cased tag, or ``None`` when unset.
-
-        Raises:
-            ValueError: If the tag contains only whitespace.
-        """
-        if value is None:
-            return None
-        value = value.strip().lower()
-        if not value:
-            raise ValueError("required_tag must not be empty when provided")
-        return value
-
-    @model_validator(mode="after")
-    def validate_distinct_columns(self) -> Self:
-        """Validate cross-field column references.
-
-        Returns:
-            This config instance.
-
-        Raises:
-            ValueError: If the category column is repeated as context.
-        """
-        if self.task_category_column in self.context_columns:
-            raise ValueError("context_columns must not repeat task_category_column")
-        return self
+        """Normalize the optional required tag."""
+        return normalize_required_tag(value)
 
     @property
     def required_columns(self) -> list[str]:
-        return [self.task_category_column, *self.context_columns]
+        return [self.environment_column]
 
     @property
     def side_effect_columns(self) -> list[str]:
diff --git a/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/impl.py b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/impl.py
index 429b3ba..7801b6d 100644
--- a/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/impl.py
+++ b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/impl.py
@@ -8,42 +8,45 @@
 import math
 import re
 import textwrap
+from collections.abc import Mapping
 from pprint import pformat
 from typing import TYPE_CHECKING, Any
 
 from data_designer.engine.column_generators.generators.base import ColumnGeneratorFullColumn
 
-from data_designer_generalist_agent_env.config import Difficulty, GeneralistAgentEnvColumnConfig
+from data_designer_generalist_agent_env.config import (
+    Difficulty,
+    GeneralistAgentEnvironmentColumnConfig,
+    GeneralistAgentTaskColumnConfig,
+)
 
 if TYPE_CHECKING:
     import pandas as pd
 
-BASE_SANDBOX_TOOLS = ["bash", "search"]
-BASE_TAGS = [
-    "budget",
-    "reliable",
-    "fast",
-    "verified",
-    "flexible",
-    "local",
-    "safe",
-    "ranked",
-]
+BASE_SANDBOX_TOOLS = ["data_designer_generated_schema", "data_designer_generated_records"]
 DIFFICULTY_ORDER: list[Difficulty] = ["simple", "medium", "hard"]
-
-DATABASE_SCHEMA = {
-    "record_id": "Stable row-local identifier.",
-    "name": "Human-readable option name.",
-    "category": "Task category supplied by the seed row.",
-    "summary": "Short synthesized description for search.",
-    "cost": "Integer cost proxy; lower is better.",
-    "duration": "Integer duration proxy.",
-    "score": "Integer quality score from 55 to 100; higher is better.",
-    "tags": "Searchable task-specific labels.",
-    "source_values": "Context columns copied from the seed row.",
+REQUIRED_RECORD_FIELDS = ["record_id", "name", "summary", "cost", "duration", "score", "tags"]
+DEFAULT_DATABASE_SCHEMA = {
+    "record_type": "generated_candidate",
+    "primary_key": "record_id",
+    "fields": [
+        {"name": "record_id", "type": "string", "description": "Stable row-local identifier."},
+        {"name": "name", "type": "string", "description": "Human-readable candidate name."},
+        {"name": "summary", "type": "string", "description": "Short generated candidate description."},
+        {"name": "cost", "type": "integer", "description": "Integer cost proxy; lower is better."},
+        {"name": "duration", "type": "integer", "description": "Integer duration or effort proxy."},
+        {"name": "score", "type": "integer", "description": "Integer quality score from 0 to 100; higher is better."},
+        {"name": "tags", "type": "list[string]", "description": "Searchable task-specific labels."},
+        {"name": "attributes", "type": "object", "description": "Topic-specific generated attributes."},
+    ],
 }
 
 TOOL_FUNCTION_SOURCES = {
+    "describe_schema": '''
+def describe_schema():
+    """Return the generated database schema."""
+    return dict(DATABASE_SCHEMA)
+''',
     "list_records": '''
 def list_records():
     """Return every record in the sandbox database."""
@@ -51,17 +54,20 @@ def list_records():
 ''',
     "search_records": '''
 def search_records(query="", max_results=10):
-    """Search database records by name, summary, category, or tag."""
+    """Search database records by name, summary, topic, tag, or generated attribute."""
     needle = str(query or "").casefold()
     limit = max(0, int(max_results))
     matches = []
     for record in DATABASE:
+        attributes = record.get("attributes", {})
+        attribute_text = " ".join(str(value) for value in attributes.values()) if isinstance(attributes, dict) else ""
         haystack = " ".join(
             [
                 str(record.get("name", "")),
                 str(record.get("summary", "")),
-                str(record.get("category", "")),
+                str(record.get("topic", "")),
                 " ".join(str(tag) for tag in record.get("tags", [])),
+                attribute_text,
             ],
         ).casefold()
         if not needle or needle in haystack:
@@ -103,14 +109,34 @@ def rank_records(records=None, metric="score", descending=True):
 }
 
 TOOL_DESCRIPTIONS = {
-    "list_records": "Inspect all rows in the hidden sandbox database.",
-    "search_records": "Retrieve category-relevant records through a search-style interface.",
-    "get_record": "Fetch one database record by identifier.",
+    "describe_schema": "Inspect the generated row-local database schema.",
+    "list_records": "Inspect all generated rows in the hidden sandbox database.",
+    "search_records": "Retrieve topic-relevant records through a search-style interface.",
+    "get_record": "Fetch one generated database record by identifier.",
     "filter_records": "Apply verifier-aligned constraints without exposing the database directly.",
-    "rank_records": "Rank candidate records for the final combinatorial selection step.",
+    "rank_records": "Rank generated candidate records for the final selection step.",
 }
 
 
+def is_null_like(value: object) -> bool:
+    """Return whether a value is empty or pandas-null-like.
+
+    Args:
+        value: Candidate cell value.
+
+    Returns:
+        ``True`` when the value should be treated as missing.
+    """
+    if value is None:
+        return True
+    if isinstance(value, float) and math.isnan(value):
+        return True
+    try:
+        return bool(value != value)
+    except (TypeError, ValueError):
+        return False
+
+
 def normalize_cell(value: object) -> str:
     """Normalize one pandas cell into a stable text value.
 
@@ -120,15 +146,8 @@ def normalize_cell(value: object) -> str:
     Returns:
         A stripped string, or an empty string for null-like values.
     """
-    if value is None:
-        return ""
-    if isinstance(value, float) and math.isnan(value):
+    if is_null_like(value):
         return ""
-    try:
-        if value != value:
-            return ""
-    except (TypeError, ValueError):
-        pass
     return str(value).strip()
 
 
@@ -160,103 +179,314 @@ def stable_int(seed: str, modulo: int) -> int:
     return int(digest[:12], 16) % modulo
 
 
-def unique_values(values: list[str]) -> list[str]:
-    """Return values with duplicates removed while preserving order.
+def coerce_list_like(value: Any) -> list[Any] | None:
+    """Coerce common list-like values into a Python list.
+
+    Args:
+        value: Candidate list-like value.
+
+    Returns:
+        A Python list when coercion is possible, otherwise ``None``.
+    """
+    if isinstance(value, list):
+        return value
+    if isinstance(value, tuple):
+        return list(value)
+    tolist = getattr(value, "tolist", None)
+    if callable(tolist):
+        converted = tolist()
+        if isinstance(converted, list):
+            return converted
+    return None
+
+
+def to_plain_data(value: Any) -> Any:
+    """Convert nested array-like values into JSON-style Python containers.
 
     Args:
-        values: Candidate values.
+        value: Arbitrary generated artifact value.
 
     Returns:
-        De-duplicated values.
+        The value with mappings and list-like values recursively normalized.
     """
-    result: list[str] = []
-    for value in values:
-        if value and value not in result:
-            result.append(value)
-    return result
+    if isinstance(value, Mapping):
+        return {key: to_plain_data(nested_value) for key, nested_value in value.items()}
+
+    values = coerce_list_like(value)
+    if values is not None:
+        return [to_plain_data(nested_value) for nested_value in values]
 
+    item = getattr(value, "item", None)
+    if callable(item) and not isinstance(value, (str, bytes)):
+        try:
+            return item()
+        except (TypeError, ValueError):
+            return value
+    return value
 
-def context_tags(category: str, context_values: dict[str, str], required_tag: str | None) -> list[str]:
-    """Build a task-specific tag vocabulary.
+
+def parse_generated_payload(value: Any, field_name: str) -> Any:
+    """Parse generated JSON-like cell values.
 
     Args:
-        category: Seed task category.
-        context_values: Row context values.
-        required_tag: Optional tag that must be present in the database.
+        value: Cell value generated by a previous Data Designer column.
+        field_name: Name used in validation errors.
 
     Returns:
-        A non-empty tag list used to populate synthesized records.
+        Parsed JSON-like data.
+
+    Raises:
+        ValueError: If the value is missing or a JSON string cannot be parsed.
     """
-    seed_text = " ".join([category, *context_values.values()])
-    words = [word for word in re.findall(r"[a-z0-9]+", seed_text.lower()) if len(word) > 3]
-    tags = unique_values([required_tag or "", *words[:6], *BASE_TAGS])
-    return tags[:12]
+    if is_null_like(value):
+        msg = f"{field_name} must not be empty"
+        raise ValueError(msg)
+    if isinstance(value, str):
+        stripped = value.strip()
+        if not stripped:
+            msg = f"{field_name} must not be empty"
+            raise ValueError(msg)
+        try:
+            return json.loads(stripped)
+        except json.JSONDecodeError:
+            return stripped
+    return to_plain_data(value)
 
 
-def build_context_summary(context_values: dict[str, str]) -> str:
-    """Summarize row context for record descriptions.
+def constraint_payload_to_text(value: Any) -> str:
+    """Flatten a generated constraints payload into compact text.
 
     Args:
-        context_values: Context columns extracted from the seed row.
+        value: Constraint value from a row. Supported values include strings,
+            mappings, lists, JSON strings, and scalar values.
 
     Returns:
-        Compact text suitable for generated summaries.
+        Text used for environment provenance.
     """
-    if not context_values:
-        return "seed category only"
-    return "; ".join(f"{name}: {value}" for name, value in context_values.items() if value) or "empty context"
+    if is_null_like(value):
+        return ""
 
+    if isinstance(value, str):
+        stripped = value.strip()
+        if not stripped:
+            return ""
+        try:
+            return constraint_payload_to_text(json.loads(stripped))
+        except json.JSONDecodeError:
+            return stripped
 
-def build_database(
-    category: str,
-    context_values: dict[str, str],
-    database_size: int,
-    required_tag: str | None,
-) -> list[dict[str, Any]]:
-    """Synthesize a row-local sandbox database.
-
-    Args:
-        category: Seed task category.
-        context_values: Optional context copied from the input row.
-        database_size: Number of database records to create.
-        required_tag: Optional tag that must be inserted into at least one record.
-
-    Returns:
-        JSON-compatible database records.
-    """
-    category_slug = slugify(category, "task")
-    seed_context = json.dumps(context_values, sort_keys=True)
-    tags = context_tags(category, context_values, required_tag)
-    context_summary = build_context_summary(context_values)
-    records: list[dict[str, Any]] = []
-
-    for position in range(database_size):
-        record_seed = f"{category}|{seed_context}|{position}"
-        cost = 80 + stable_int(f"{record_seed}|cost", 920)
-        duration = 1 + stable_int(f"{record_seed}|duration", 14)
-        score = 55 + stable_int(f"{record_seed}|score", 46)
-        tag_start = stable_int(f"{record_seed}|tags", len(tags))
-        record_tags = [tags[(tag_start + offset) % len(tags)] for offset in range(min(3, len(tags)))]
-        if required_tag and position == 0 and required_tag not in record_tags:
-            record_tags[0] = required_tag
-        name = f"{category.title()} Option {position + 1}"
-        records.append(
-            {
-                "record_id": f"{category_slug}-{position + 1:03d}",
-                "name": name,
-                "category": category,
-                "summary": f"{name} synthesized from {context_summary}.",
-                "cost": cost,
-                "duration": duration,
-                "score": score,
-                "tags": unique_values(record_tags),
-                "source_values": dict(context_values),
-            }
-        )
+    plain = to_plain_data(value)
+    if isinstance(plain, Mapping):
+        parts = []
+        for key, nested_value in plain.items():
+            nested_text = constraint_payload_to_text(nested_value)
+            if nested_text:
+                parts.append(f"{key}: {nested_text}")
+        return "; ".join(parts)
+
+    values = coerce_list_like(plain)
+    if values is not None:
+        return "; ".join(text for text in (constraint_payload_to_text(item) for item in values) if text)
+
+    return normalize_cell(plain)
+
+
+def normalize_database_schema(value: Any) -> dict[str, Any]:
+    """Normalize a generated database schema payload.
+
+    Args:
+        value: Generated schema value from a row.
+
+    Returns:
+        Schema metadata as a dictionary.
+
+    Raises:
+        ValueError: If the schema is not mapping-like.
+    """
+    parsed = parse_generated_payload(value, "database_schema")
+    if not isinstance(parsed, Mapping):
+        msg = "database_schema must be a mapping generated by an upstream Data Designer column"
+        raise ValueError(msg)
 
+    schema = dict(parsed)
+    if "record_type" not in schema:
+        schema["record_type"] = DEFAULT_DATABASE_SCHEMA["record_type"]
+    if "primary_key" not in schema:
+        schema["primary_key"] = DEFAULT_DATABASE_SCHEMA["primary_key"]
+    if "fields" not in schema:
+        schema["fields"] = DEFAULT_DATABASE_SCHEMA["fields"]
+    return schema
+
+
+def extract_records_payload(value: Any) -> list[Any]:
+    """Extract generated record payloads from common structured output shapes.
+
+    Args:
+        value: Generated records value from a row.
+
+    Returns:
+        List of record payloads.
+
+    Raises:
+        ValueError: If no list-like records can be extracted.
+    """
+    parsed = parse_generated_payload(value, "database_records")
+    if isinstance(parsed, Mapping):
+        for key in ("records", "items", "data", "rows"):
+            nested = parsed.get(key)
+            if nested is not None:
+                records = coerce_list_like(nested)
+                if records is not None:
+                    return records
+        msg = "database_records mapping must contain a records, items, data, or rows list"
+        raise ValueError(msg)
+
+    records = coerce_list_like(parsed)
+    if records is None:
+        msg = "database_records must be a list or an object containing a records list"
+        raise ValueError(msg)
     return records
 
 
+def normalize_tags(value: Any, record_id: str) -> list[str]:
+    """Normalize generated record tags.
+
+    Args:
+        value: Generated tags value.
+        record_id: Record id used in errors.
+
+    Returns:
+        List of tag strings.
+
+    Raises:
+        ValueError: If tags are missing or cannot be interpreted as a non-empty list.
+    """
+    tags = coerce_list_like(value)
+    if tags is None and isinstance(value, str):
+        tags = [tag.strip() for tag in re.split(r"[,;]", value) if tag.strip()]
+    if tags is None or not tags:
+        msg = f"generated record {record_id!r} must include at least one tag"
+        raise ValueError(msg)
+    return [str(tag).strip().lower() for tag in tags if str(tag).strip()]
+
+
+def normalize_int_field(value: Any, field_name: str, record_id: str) -> int:
+    """Normalize an integer field from a generated record.
+
+    Args:
+        value: Generated field value.
+        field_name: Field name.
+        record_id: Record id used in errors.
+
+    Returns:
+        Integer field value.
+
+    Raises:
+        ValueError: If the value cannot be converted to an integer.
+    """
+    try:
+        return int(value)
+    except (TypeError, ValueError) as exc:
+        msg = f"generated record {record_id!r} field {field_name!r} must be an integer"
+        raise ValueError(msg) from exc
+
+
+def normalize_generated_record(value: Any, index: int, topic: str) -> dict[str, Any]:
+    """Normalize and validate one generated database record.
+
+    Args:
+        value: Generated record value.
+        index: Zero-based record index.
+        topic: Generated task topic.
+
+    Returns:
+        Normalized record.
+
+    Raises:
+        ValueError: If required fields are absent or invalid.
+    """
+    value = to_plain_data(value)
+    if not isinstance(value, Mapping):
+        msg = f"database_records[{index}] must be a mapping"
+        raise ValueError(msg)
+
+    record = dict(value)
+    missing = [field for field in REQUIRED_RECORD_FIELDS if field not in record]
+    if missing:
+        msg = f"database_records[{index}] is missing required fields: {', '.join(missing)}"
+        raise ValueError(msg)
+
+    record_id = str(record["record_id"]).strip()
+    if not record_id:
+        msg = f"database_records[{index}].record_id must not be empty"
+        raise ValueError(msg)
+
+    normalized = dict(record)
+    normalized["record_id"] = record_id
+    normalized["name"] = str(record["name"]).strip()
+    normalized["summary"] = str(record["summary"]).strip()
+    normalized["topic"] = str(record.get("topic") or topic)
+    normalized["cost"] = normalize_int_field(record["cost"], "cost", record_id)
+    normalized["duration"] = normalize_int_field(record["duration"], "duration", record_id)
+    normalized["score"] = normalize_int_field(record["score"], "score", record_id)
+    normalized["tags"] = normalize_tags(record["tags"], record_id)
+    attributes = record.get("attributes", {})
+    normalized["attributes"] = dict(attributes) if isinstance(attributes, Mapping) else {"value": attributes}
+    return normalized
+
+
+def normalize_database_records(value: Any, topic: str | None = None) -> list[dict[str, Any]]:
+    """Normalize generated records restored from memory or saved artifacts.
+
+    Args:
+        value: Database records payload.
+        topic: Generated topic used when records omit a topic field.
+
+    Returns:
+        Database records as plain dictionaries.
+
+    Raises:
+        ValueError: If records are absent or invalid.
+    """
+    records = extract_records_payload(value)
+    if not records:
+        msg = "database_records must contain at least one generated record"
+        raise ValueError(msg)
+    topic = topic or "general task"
+    normalized = [normalize_generated_record(record, index, topic) for index, record in enumerate(records)]
+    duplicate_ids = sorted(
+        {
+            record["record_id"]
+            for record in normalized
+            if [candidate["record_id"] for candidate in normalized].count(record["record_id"]) > 1
+        }
+    )
+    if duplicate_ids:
+        msg = f"database_records contain duplicate record_id values: {', '.join(duplicate_ids)}"
+        raise ValueError(msg)
+    return normalized
+
+
+def validate_schema_covers_records(schema: Mapping[str, Any], records: list[dict[str, Any]]) -> None:
+    """Validate that generated records are compatible with the generated schema.
+
+    Args:
+        schema: Generated schema metadata.
+        records: Normalized generated records.
+
+    Raises:
+        ValueError: If the schema primary key is incompatible with records.
+    """
+    primary_key = str(schema.get("primary_key", "record_id"))
+    if primary_key != "record_id":
+        msg = "generated database schema primary_key must be 'record_id'"
+        raise ValueError(msg)
+    for field in REQUIRED_RECORD_FIELDS:
+        if field not in records[0]:
+            msg = f"generated records must include required field {field!r}"
+            raise ValueError(msg)
+
+
 def record_matches_constraints(record: dict[str, Any], constraints: dict[str, Any]) -> bool:
     """Return whether a record satisfies task constraints.
 
@@ -304,14 +534,14 @@ def select_best_record(records: list[dict[str, Any]]) -> dict[str, Any] | None:
 
 def default_constraints(
     database: list[dict[str, Any]],
-    config: GeneralistAgentEnvColumnConfig,
+    config: GeneralistAgentTaskColumnConfig,
     difficulty: Difficulty | None = None,
 ) -> dict[str, Any]:
     """Create feasible default constraints for the requested difficulty.
 
     Args:
         database: Sandbox database records.
-        config: Column configuration.
+        config: Task column configuration.
         difficulty: Difficulty to synthesize; defaults to the configured final difficulty.
 
     Returns:
@@ -390,7 +620,7 @@ def selected_tool_names(difficulty: Difficulty) -> list[str]:
     Returns:
         Tool names to expose to the solution function.
     """
-    tool_names = ["list_records", "search_records", "get_record"]
+    tool_names = ["describe_schema", "list_records", "search_records", "get_record"]
     if difficulty in ("medium", "hard"):
         tool_names.append("filter_records")
     if difficulty == "hard":
@@ -417,26 +647,32 @@ def build_tool_specs(tool_names: list[str]) -> list[dict[str, str]]:
     ]
 
 
-def build_tool_module_source(database: list[dict[str, Any]], tool_names: list[str]) -> str:
-    """Build executable Python source for the synthesized tool module.
+def build_tool_module_source(
+    database_schema: Mapping[str, Any], database: list[dict[str, Any]], tool_names: list[str]
+) -> str:
+    """Build executable Python source for the generated tool module.
 
     Args:
-        database: Hidden sandbox database.
+        database_schema: Generated row-local database schema.
+        database: Generated sandbox database.
         tool_names: Selected tool names.
 
     Returns:
-        Python module source defining ``DATABASE`` and tool functions.
+        Python module source defining ``DATABASE_SCHEMA``, ``DATABASE``, and tool functions.
     """
-    parts = [f"DATABASE = {pformat(database, sort_dicts=False, width=120)}"]
+    parts = [
+        f"DATABASE_SCHEMA = {pformat(dict(database_schema), sort_dicts=False, width=120)}",
+        f"DATABASE = {pformat(database, sort_dicts=False, width=120)}",
+    ]
     parts.extend(textwrap.dedent(TOOL_FUNCTION_SOURCES[tool_name]).strip() for tool_name in tool_names)
     return "\n\n".join(parts) + "\n"
 
 
-def build_task_prompt(category: str, difficulty: Difficulty, constraints: dict[str, Any]) -> str:
+def build_task_prompt(topic: str, difficulty: Difficulty, constraints: dict[str, Any]) -> str:
     """Create the task prompt presented to a solving agent.
 
     Args:
-        category: Seed task category.
+        topic: Generated task topic.
         difficulty: Final task difficulty.
         constraints: Task constraints.
 
@@ -444,7 +680,8 @@ def build_task_prompt(category: str, difficulty: Difficulty, constraints: dict[s
         Natural language task prompt.
     """
     clauses = [
-        f"Use the synthesized tools to solve this {difficulty} {category!r} task.",
+        f"Use the synthesized tools to solve this {difficulty} {topic!r} task.",
+        "Inspect the generated schema and records through the tool interface; do not access the database directly.",
         "Return the record_id for the eligible database record with the highest score.",
         f"Only consider records with cost <= {constraints['max_cost']} and score >= {constraints['min_score']}.",
     ]
@@ -512,6 +749,7 @@ def build_solution_source(constraints: dict[str, Any], difficulty: Difficulty) -
     lines = [
         "def solve(tools):",
         '    """Solve the task using only synthesized tool functions and local logic."""',
+        '    tools["describe_schema"]()',
         f"    required_tag = {required_tag}",
     ]
 
@@ -624,17 +862,17 @@ def verify(answer, database):
 
 
 def build_task_iteration(
-    category: str,
+    topic: str,
     database: list[dict[str, Any]],
-    config: GeneralistAgentEnvColumnConfig,
+    config: GeneralistAgentTaskColumnConfig,
     difficulty: Difficulty,
 ) -> dict[str, Any]:
     """Build one synthesized task, solution, and verifier iteration.
 
     Args:
-        category: Seed task category.
+        topic: Generated task topic.
         database: Sandbox database records.
-        config: Column configuration.
+        config: Task column configuration.
         difficulty: Difficulty level for this iteration.
 
     Returns:
@@ -646,7 +884,7 @@ def build_task_iteration(
     return {
         "difficulty": difficulty,
         "tool_names": selected_tool_names(difficulty),
-        "task_prompt": build_task_prompt(category, difficulty, constraints),
+        "task_prompt": build_task_prompt(topic, difficulty, constraints),
         "constraints": constraints,
         "solution_source": build_solution_source(constraints, difficulty),
         "verifier_source": build_verifier_source(constraints),
@@ -656,50 +894,49 @@ def build_task_iteration(
     }
 
 
+def difficulty_trace(final_difficulty: Difficulty) -> list[Difficulty]:
+    """List difficulty levels synthesized before the final task.
+
+    Args:
+        final_difficulty: Requested final difficulty.
+
+    Returns:
+        Ordered difficulty names through the final level.
+    """
+    return DIFFICULTY_ORDER[: DIFFICULTY_ORDER.index(final_difficulty) + 1]
+
+
 def build_task_iterations(
-    category: str,
+    topic: str,
     database: list[dict[str, Any]],
-    config: GeneralistAgentEnvColumnConfig,
+    config: GeneralistAgentTaskColumnConfig,
 ) -> list[dict[str, Any]]:
     """Build the simple-to-final task synthesis iterations.
 
     Args:
-        category: Seed task category.
+        topic: Generated task topic.
         database: Sandbox database records.
-        config: Column configuration.
+        config: Task column configuration.
 
     Returns:
         Ordered task iteration artifacts.
     """
     return [
-        build_task_iteration(category, database, config, difficulty)
-        for difficulty in difficulty_trace(config.difficulty)
+        build_task_iteration(topic, database, config, difficulty) for difficulty in difficulty_trace(config.difficulty)
     ]
 
 
-def difficulty_trace(final_difficulty: Difficulty) -> list[Difficulty]:
-    """List difficulty levels synthesized before the final task.
-
-    Args:
-        final_difficulty: Requested final difficulty.
-
-    Returns:
-        Ordered difficulty names through the final level.
-    """
-    return DIFFICULTY_ORDER[: DIFFICULTY_ORDER.index(final_difficulty) + 1]
-
-
-def build_synthesis_trace(
-    category: str,
+def build_task_synthesis_trace(
+    topic: str,
     difficulty: Difficulty,
     tool_names: list[str],
     constraints: dict[str, Any],
     verified: bool,
 ) -> list[dict[str, Any]]:
-    """Describe the Generalist-style synthesis workflow for one row.
+    """Describe the task synthesis workflow for one row.
 
     Args:
-        category: Seed task category.
+        topic: Generated task topic.
         difficulty: Final task difficulty.
         tool_names: Synthesized tool names.
         constraints: Final task constraints.
@@ -708,20 +945,13 @@ def build_synthesis_trace(
     Returns:
         Ordered workflow events.
     """
-    trace: list[dict[str, Any]] = [
-        {
-            "stage": "environment_and_toolset_construction",
-            "category": category,
-            "sandbox_tools": list(BASE_SANDBOX_TOOLS),
-            "database_created": True,
-        }
-    ]
+    trace: list[dict[str, Any]] = []
     for level in difficulty_trace(difficulty):
         trace.append(
             {
                 "stage": "task_synthesis",
                 "difficulty": level,
-                "goal": "hard to solve through search, easy to verify by deterministic constraints",
+                "goal": "hard to solve through tools, easy to verify by deterministic constraints",
             }
         )
         if level in ("medium", "hard"):
@@ -735,6 +965,7 @@ def build_synthesis_trace(
     trace.append(
         {
             "stage": "solution_generation",
+            "topic": topic,
             "solution_restriction": "solution source calls synthesized tools and uses local logical computation only",
             "final_tools": tool_names,
         }
@@ -749,60 +980,127 @@ def build_synthesis_trace(
     return trace
 
 
-def build_environment_tuple(
-    category: str,
+def build_environment_id(topic: str, context_values: dict[str, str], row_number: int) -> str:
+    """Build a stable row-local environment identifier.
+
+    Args:
+        topic: Generated task topic.
+        context_values: Context copied from the seed row.
+        row_number: Zero-based row position.
+
+    Returns:
+        Stable environment identifier.
+    """
+    topic_slug = slugify(topic, "task")
+    context_slug = stable_int(json.dumps(context_values, sort_keys=True), 10_000)
+    return f"{topic_slug}-{row_number + 1:04d}-{context_slug:04d}"
+
+
+def build_environment_artifact(
+    topic: str,
+    constraints_payload: Any,
+    constraints_text: str,
     context_values: dict[str, str],
-    config: GeneralistAgentEnvColumnConfig,
+    database_schema: dict[str, Any],
+    database: list[dict[str, Any]],
     row_number: int,
 ) -> dict[str, Any]:
-    """Build one ``<environment, tools, task, verifier>`` tuple.
+    """Build one standalone generated environment and toolset artifact.
 
     Args:
-        category: Seed task category.
+        topic: Generated task topic.
+        constraints_payload: Raw generated constraints payload normalized to JSON-like data.
+        constraints_text: Generated constraints flattened to text.
         context_values: Context copied from the seed row.
-        config: Column configuration.
+        database_schema: Generated database schema.
+        database: Generated database records.
         row_number: Zero-based row position used for stable ids.
 
     Returns:
-        Structured Generalist environment tuple.
+        Structured Generalist environment artifact.
     """
-    database = build_database(category, context_values, config.database_size, config.required_tag)
-    task_iterations = build_task_iterations(category, database, config)
-    final_iteration = task_iterations[-1]
-    constraints = final_iteration["constraints"]
-    answer = final_iteration["reference_answer"]
-    verified = bool(final_iteration["reference_solution_passed"])
-    tool_names = final_iteration["tool_names"]
-    category_slug = slugify(category, "task")
-    context_slug = stable_int(json.dumps(context_values, sort_keys=True), 10_000)
-    environment_id = f"{category_slug}-{row_number + 1:04d}-{context_slug:04d}"
-
+    validate_schema_covers_records(database_schema, database)
+    environment_id = build_environment_id(topic, context_values, row_number)
+    tool_names = selected_tool_names("hard")
     return {
-        "schema_version": "generalist-agent-env/v1",
-        "source_workflow": "DeepSeek-V3.2 Generalist automatic environment synthesis",
+        "schema_version": "generalist-agent-environment/v1",
+        "source_workflow": "Generated Generalist environment and toolset assembly",
         "environment": {
             "environment_id": environment_id,
-            "category": category,
+            "topic": topic,
             "sandbox": {
                 "base_tools": list(BASE_SANDBOX_TOOLS),
                 "database_name": f"{environment_id}_db",
-                "database_schema": dict(DATABASE_SCHEMA),
             },
+            "database_schema": database_schema,
             "database": database,
             "database_record_count": len(database),
+            "task_constraints": constraints_payload,
+            "task_constraints_text": constraints_text,
             "source_context": dict(context_values),
-            "data_acquisition": {
-                "mode": "synthetic",
-                "base_sandbox_tools": list(BASE_SANDBOX_TOOLS),
-                "note": "Records are generated locally from seed data; downstream workflows may replace them with search-retrieved records.",
+            "data_generation": {
+                "mode": "generated_by_data_designer_columns",
+                "note": "Topic, constraints, schema, and records are generated upstream by Data Designer columns.",
             },
         },
         "tools": build_tool_specs(tool_names),
-        "tool_module_source": build_tool_module_source(database, tool_names),
+        "tool_module_source": build_tool_module_source(database_schema, database, tool_names),
+        "synthesis_trace": [
+            {
+                "stage": "topic_and_constraint_intake",
+                "topic": topic,
+                "constraints_available": bool(constraints_text),
+            },
+            {
+                "stage": "schema_intake",
+                "record_type": database_schema.get("record_type"),
+                "primary_key": database_schema.get("primary_key"),
+            },
+            {
+                "stage": "generated_data_intake",
+                "database_record_count": len(database),
+                "toolset": tool_names,
+            },
+        ],
+    }
+
+
+def build_task_tuple(
+    environment_artifact: dict[str, Any],
+    config: GeneralistAgentTaskColumnConfig,
+) -> dict[str, Any]:
+    """Build one ``<environment, tools, task, verifier>`` tuple from an environment.
+
+    Args:
+        environment_artifact: Output from ``generalist-agent-environment``.
+        config: Task column configuration.
+
+    Returns:
+        Structured Generalist task tuple.
+    """
+    environment = dict(environment_artifact["environment"])
+    database_schema = normalize_database_schema(environment["database_schema"])
+    topic = str(environment.get("topic") or "general task")
+    database = normalize_database_records(environment["database"], topic)
+    environment["database_schema"] = database_schema
+    environment["database"] = database
+    task_iterations = build_task_iterations(topic, database, config)
+    final_iteration = task_iterations[-1]
+    constraints = final_iteration["constraints"]
+    answer = final_iteration["reference_answer"]
+    verified = bool(final_iteration["reference_solution_passed"])
+    tool_names = final_iteration["tool_names"]
+
+    return {
+        "schema_version": "generalist-agent-task/v1",
+        "source_workflow": "Generalist task synthesis from generated environment",
+        "environment": environment,
+        "tools": build_tool_specs(tool_names),
+        "tool_module_source": build_tool_module_source(database_schema, database, tool_names),
         "task": {
             "difficulty": config.difficulty,
-            "category": category,
-            "prompt": build_task_prompt(category, config.difficulty, constraints),
+            "topic": topic,
+            "prompt": build_task_prompt(topic, config.difficulty, constraints),
             "constraints": constraints,
             "answer_schema": {
                 "record_id": "string or null",
@@ -829,31 +1127,76 @@ def build_environment_tuple(
         },
         "reference_answer": answer,
         "task_iterations": task_iterations,
-        "synthesis_trace": build_synthesis_trace(category, config.difficulty, tool_names, constraints, verified),
+        "synthesis_trace": [
+            *environment_artifact.get("synthesis_trace", []),
+            *build_task_synthesis_trace(topic, config.difficulty, tool_names, constraints, verified),
+        ],
         "rl_filter_note": "Downstream RL retention can keep generated tuples with non-zero pass@100.",
     }
 
 
-class GeneralistAgentEnvColumnGenerator(ColumnGeneratorFullColumn[GeneralistAgentEnvColumnConfig]):
-    """Generate Generalist agent environment tuples for each input row."""
+class GeneralistAgentEnvironmentColumnGenerator(ColumnGeneratorFullColumn[GeneralistAgentEnvironmentColumnConfig]):
+    """Assemble generated Generalist environment and toolset artifacts."""
 
     def generate(self, data: pd.DataFrame) -> pd.DataFrame:
-        """Generate structured environment tuples.
+        """Generate environment artifacts from upstream generated schema and records.
 
         Args:
-            data: Input DataFrame containing the configured task category and context columns.
+            data: Input DataFrame containing generated task topic, optional
+                generated constraints, generated database schema, generated
+                database records, and optional context columns.
 
         Returns:
             The input DataFrame with the configured output column populated.
         """
-        tuples: list[dict[str, Any]] = []
+        artifacts: list[dict[str, Any]] = []
         for row_number, (_, row) in enumerate(data.iterrows()):
-            category = normalize_cell(row[self.config.task_category_column]) or "general task"
+            topic = normalize_cell(row[self.config.task_topic_column]) or "general task"
+            constraints_cell = (
+                row[self.config.task_constraints_column] if self.config.task_constraints_column is not None else None
+            )
+            constraints_payload = to_plain_data(constraints_cell) if constraints_cell is not None else {}
+            constraints_text = constraint_payload_to_text(constraints_cell)
+            database_schema = normalize_database_schema(row[self.config.database_schema_column])
+            database = normalize_database_records(row[self.config.database_records_column], topic)
             context_values = {
                 column: normalize_cell(row[column])
                 for column in self.config.context_columns
                 if normalize_cell(row[column])
             }
-            tuples.append(build_environment_tuple(category, context_values, self.config, row_number))
+            artifacts.append(
+                build_environment_artifact(
+                    topic,
+                    constraints_payload,
+                    constraints_text,
+                    context_values,
+                    database_schema,
+                    database,
+                    row_number,
+                )
+            )
+        data[self.config.name] = artifacts
+        return data
+
+
+class GeneralistAgentTaskColumnGenerator(ColumnGeneratorFullColumn[GeneralistAgentTaskColumnConfig]):
+    """Generate Generalist task tuples from constructed generated environments."""
+
+    def generate(self, data: pd.DataFrame) -> pd.DataFrame:
+        """Generate task, solution, and verifier tuples from environments.
+
+        Args:
+            data: Input DataFrame containing the configured environment column.
+
+        Returns:
+            The input DataFrame with the configured output column populated.
+        """
+        tuples: list[dict[str, Any]] = []
+        for _, row in data.iterrows():
+            environment_artifact = row[self.config.environment_column]
+            if not isinstance(environment_artifact, dict):
+                msg = f"{self.config.environment_column!r} must contain environment artifact dictionaries"
+                raise ValueError(msg)
+            tuples.append(build_task_tuple(environment_artifact, self.config))
         data[self.config.name] = tuples
         return data
diff --git a/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/plugin.py b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/plugin.py
index c1f5f3c..b4dbec8 100644
--- a/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/plugin.py
+++ b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/plugin.py
@@ -3,8 +3,14 @@
 
 from data_designer.plugins.plugin import Plugin, PluginType
 
-plugin = Plugin(
-    config_qualified_name="data_designer_generalist_agent_env.config.GeneralistAgentEnvColumnConfig",
-    impl_qualified_name="data_designer_generalist_agent_env.impl.GeneralistAgentEnvColumnGenerator",
+environment_plugin = Plugin(
+    config_qualified_name="data_designer_generalist_agent_env.config.GeneralistAgentEnvironmentColumnConfig",
+    impl_qualified_name="data_designer_generalist_agent_env.impl.GeneralistAgentEnvironmentColumnGenerator",
+    plugin_type=PluginType.COLUMN_GENERATOR,
+)
+
+task_plugin = Plugin(
+    config_qualified_name="data_designer_generalist_agent_env.config.GeneralistAgentTaskColumnConfig",
+    impl_qualified_name="data_designer_generalist_agent_env.impl.GeneralistAgentTaskColumnGenerator",
     plugin_type=PluginType.COLUMN_GENERATOR,
 )
diff --git a/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/validation.py b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/validation.py
index a14e213..e824ec2 100644
--- a/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/validation.py
+++ b/plugins/data-designer-generalist-agent-env/src/data_designer_generalist_agent_env/validation.py
@@ -248,6 +248,8 @@ def tool_output_error(tool_name: str, output: Any) -> str | None:
     Returns:
         An error message when the output shape is unexpected, otherwise ``None``.
     """
+    if tool_name == "describe_schema" and not isinstance(output, dict):
+        return f"{tool_name} returned {type(output).__name__}; expected dict"
     if tool_name in {"list_records", "search_records", "filter_records", "rank_records"}:
         if not isinstance(output, list):
             return f"{tool_name} returned {type(output).__name__}; expected list"
@@ -273,6 +275,8 @@ def invoke_tool_for_smoke_check(
     Returns:
         The tool output.
     """
+    if tool_name == "describe_schema":
+        return tool()
     if tool_name == "list_records":
         return tool()
     if tool_name == "search_records":
@@ -475,7 +479,7 @@ def verify_environment_tuple(environment_tuple: Mapping[str, Any]) -> RowRecordV
     replays every artifact in ``task_iterations`` when present.
 
     Args:
-        environment_tuple: Generated ``generalist-agent-env`` output value.
+        environment_tuple: Generated ``generalist-agent-task`` output value.
 
     Returns:
         A structured validation result with per-artifact status and errors.
diff --git a/plugins/data-designer-generalist-agent-env/tests/test_plugin.py b/plugins/data-designer-generalist-agent-env/tests/test_plugin.py
index 4ad4c81..d3eee7c 100644
--- a/plugins/data-designer-generalist-agent-env/tests/test_plugin.py
+++ b/plugins/data-designer-generalist-agent-env/tests/test_plugin.py
@@ -12,72 +12,191 @@
 from data_designer.interface.data_designer import DataDesigner
 from pydantic import ValidationError
 
-from data_designer_generalist_agent_env.config import GeneralistAgentEnvColumnConfig
+from data_designer_generalist_agent_env.config import (
+    GeneralistAgentEnvironmentColumnConfig,
+    GeneralistAgentTaskColumnConfig,
+)
 from data_designer_generalist_agent_env.impl import (
-    GeneralistAgentEnvColumnGenerator,
-    build_environment_tuple,
+    GeneralistAgentEnvironmentColumnGenerator,
+    GeneralistAgentTaskColumnGenerator,
+    build_environment_artifact,
     build_reference_answer,
+    build_task_tuple,
     default_constraints,
     selected_tool_names,
 )
-from data_designer_generalist_agent_env.plugin import plugin
+from data_designer_generalist_agent_env.plugin import environment_plugin, task_plugin
 from data_designer_generalist_agent_env.validation import verify_environment_tuple, verify_row_record
 
 
+def generated_schema() -> dict:
+    """Return a representative upstream-generated database schema."""
+    return {
+        "record_type": "trip_candidate",
+        "primary_key": "record_id",
+        "fields": [
+            {"name": "record_id", "type": "string"},
+            {"name": "name", "type": "string"},
+            {"name": "summary", "type": "string"},
+            {"name": "cost", "type": "integer"},
+            {"name": "duration", "type": "integer"},
+            {"name": "score", "type": "integer"},
+            {"name": "tags", "type": "list[string]"},
+            {"name": "attributes", "type": "object"},
+        ],
+        "attribute_fields": [
+            {"name": "hotel_fit", "type": "integer"},
+            {"name": "transport_risk", "type": "integer"},
+            {"name": "restaurant_quality", "type": "integer"},
+        ],
+    }
+
+
+def generated_records() -> list[dict]:
+    """Return representative upstream-generated database records."""
+    return [
+        {
+            "record_id": "trip-001",
+            "name": "Museum Rail Plan",
+            "summary": "Generated itinerary candidate with reliable transit and moderate cost.",
+            "cost": 240,
+            "duration": 3,
+            "score": 92,
+            "tags": ["reliable", "museum", "budget"],
+            "attributes": {"hotel_fit": 88, "transport_risk": 12, "restaurant_quality": 82},
+        },
+        {
+            "record_id": "trip-002",
+            "name": "Luxury Dining Plan",
+            "summary": "Generated itinerary candidate with high restaurant quality and higher cost.",
+            "cost": 520,
+            "duration": 3,
+            "score": 97,
+            "tags": ["restaurant", "premium", "ranked"],
+            "attributes": {"hotel_fit": 90, "transport_risk": 18, "restaurant_quality": 96},
+        },
+        {
+            "record_id": "trip-003",
+            "name": "Compact Family Plan",
+            "summary": "Generated itinerary candidate that balances family activities and reliable transport.",
+            "cost": 180,
+            "duration": 2,
+            "score": 95,
+            "tags": ["reliable", "family", "verified"],
+            "attributes": {"hotel_fit": 91, "transport_risk": 10, "restaurant_quality": 80},
+        },
+    ]
+
+
 def test_valid_plugin() -> None:
-    assert_valid_plugin(plugin)
+    assert_valid_plugin(environment_plugin)
+    assert_valid_plugin(task_plugin)
 
 
-def make_generator(config: GeneralistAgentEnvColumnConfig) -> GeneralistAgentEnvColumnGenerator:
-    """Create a generator instance without requiring a ResourceProvider."""
-    generator = GeneralistAgentEnvColumnGenerator.__new__(GeneralistAgentEnvColumnGenerator)
+def make_environment_generator(
+    config: GeneralistAgentEnvironmentColumnConfig,
+) -> GeneralistAgentEnvironmentColumnGenerator:
+    """Create an environment generator instance without requiring a ResourceProvider."""
+    generator = GeneralistAgentEnvironmentColumnGenerator.__new__(GeneralistAgentEnvironmentColumnGenerator)
     generator._config = config
     return generator
 
 
+def make_task_generator(config: GeneralistAgentTaskColumnConfig) -> GeneralistAgentTaskColumnGenerator:
+    """Create a task generator instance without requiring a ResourceProvider."""
+    generator = GeneralistAgentTaskColumnGenerator.__new__(GeneralistAgentTaskColumnGenerator)
+    generator._config = config
+    return generator
+
+
+def build_valid_task_tuple() -> dict:
+    """Build a representative valid task tuple for validation tests."""
+    task_config = GeneralistAgentTaskColumnConfig(
+        name="agent_task",
+        environment_column="agent_environment",
+        difficulty="hard",
+        required_tag="reliable",
+    )
+    environment = build_environment_artifact(
+        "trip planning",
+        {
+            "goal": "plan a constrained itinerary",
+            "constraints": ["moderate budget", "reliable transport", "strong local evidence"],
+        },
+        "goal: plan a constrained itinerary; constraints: moderate budget; reliable transport",
+        {"notes": "family-friendly museums and restaurants"},
+        generated_schema(),
+        generated_records(),
+        row_number=0,
+    )
+    return build_task_tuple(environment, task_config)
+
+
 class TestGeneralistAgentEnvColumnConfig:
-    def test_required_columns_include_category_and_context(self) -> None:
-        config = GeneralistAgentEnvColumnConfig(
-            name="agent_env",
-            task_category_column="category",
-            context_columns=["constraints", "persona", "constraints"],
+    def test_environment_config_required_columns_include_generated_schema_and_records(self) -> None:
+        config = GeneralistAgentEnvironmentColumnConfig(
+            name="agent_environment",
+            task_topic_column="topic",
+            task_constraints_column="constraints",
+            database_schema_column="schema",
+            database_records_column="records",
+            context_columns=["notes", "persona"],
         )
 
-        assert config.required_columns == ["category", "constraints", "persona"]
+        assert config.required_columns == ["topic", "constraints", "schema", "records", "notes", "persona"]
         assert config.side_effect_columns == []
 
-    def test_column_emoji(self) -> None:
-        config = GeneralistAgentEnvColumnConfig(name="agent_env", task_category_column="category")
+    def test_task_config_requires_environment_column(self) -> None:
+        config = GeneralistAgentTaskColumnConfig(
+            name="agent_task",
+            environment_column="agent_environment",
+        )
 
-        assert config.get_column_emoji() == "🧰"
+        assert config.required_columns == ["agent_environment"]
+        assert config.side_effect_columns == []
 
-    def test_rejects_empty_category_column(self) -> None:
-        with pytest.raises(ValidationError, match="task_category_column must not be empty"):
-            GeneralistAgentEnvColumnConfig(name="agent_env", task_category_column=" ")
+    def test_rejects_repeated_input_columns(self) -> None:
+        with pytest.raises(ValidationError, match="must be distinct"):
+            GeneralistAgentEnvironmentColumnConfig(
+                name="agent_environment",
+                task_topic_column="topic",
+                task_constraints_column="constraints",
+                database_schema_column="schema",
+                database_records_column="records",
+                context_columns=["constraints"],
+            )
 
-    def test_rejects_category_repeated_as_context(self) -> None:
-        with pytest.raises(ValidationError, match="context_columns must not repeat task_category_column"):
-            GeneralistAgentEnvColumnConfig(
-                name="agent_env",
-                task_category_column="category",
-                context_columns=["category"],
+    def test_rejects_empty_topic_column(self) -> None:
+        with pytest.raises(ValidationError, match="task_topic_column must not be empty"):
+            GeneralistAgentEnvironmentColumnConfig(
+                name="agent_environment",
+                task_topic_column=" ",
+                database_schema_column="schema",
+                database_records_column="records",
             )
 
-    def test_normalizes_required_tag(self) -> None:
-        config = GeneralistAgentEnvColumnConfig(
-            name="agent_env",
-            task_category_column="category",
-            required_tag="  Family  ",
+    def test_normalizes_task_required_tag(self) -> None:
+        config = GeneralistAgentTaskColumnConfig(
+            name="agent_task",
+            environment_column="agent_environment",
+            required_tag="  Reliable  ",
         )
 
-        assert config.required_tag == "family"
+        assert config.required_tag == "reliable"
 
 
 class TestGeneralistAgentEnvHelpers:
     def test_tool_names_follow_difficulty(self) -> None:
-        assert selected_tool_names("simple") == ["list_records", "search_records", "get_record"]
-        assert selected_tool_names("medium") == ["list_records", "search_records", "get_record", "filter_records"]
+        assert selected_tool_names("simple") == ["describe_schema", "list_records", "search_records", "get_record"]
+        assert selected_tool_names("medium") == [
+            "describe_schema",
+            "list_records",
+            "search_records",
+            "get_record",
+            "filter_records",
+        ]
         assert selected_tool_names("hard") == [
+            "describe_schema",
             "list_records",
             "search_records",
             "get_record",
@@ -86,18 +205,7 @@ def test_tool_names_follow_difficulty(self) -> None:
         ]
 
     def test_reference_answer_is_verifier_optimal(self) -> None:
-        config = GeneralistAgentEnvColumnConfig(
-            name="agent_env",
-            task_category_column="category",
-            difficulty="hard",
-            required_tag="family",
-        )
-        environment_tuple = build_environment_tuple(
-            "planning a travel itinerary",
-            {"city": "Seoul", "budget": "1200"},
-            config,
-            row_number=0,
-        )
+        environment_tuple = build_valid_task_tuple()
 
         validation = verify_environment_tuple(environment_tuple)
 
@@ -106,19 +214,28 @@ def test_reference_answer_is_verifier_optimal(self) -> None:
         assert validation.tools_passed is True
         assert validation.answer == environment_tuple["reference_answer"]
         assert environment_tuple["verifier"]["reference_solution_passed"] is True
-        assert environment_tuple["task"]["constraints"]["required_tag"] == "family"
+        assert environment_tuple["task"]["constraints"]["required_tag"] == "reliable"
 
     def test_constraints_are_repaired_when_user_values_are_unsat(self) -> None:
-        config = GeneralistAgentEnvColumnConfig(
-            name="agent_env",
-            task_category_column="category",
+        task_config = GeneralistAgentTaskColumnConfig(
+            name="agent_task",
+            environment_column="agent_environment",
             required_tag="rare",
             max_cost=1,
             min_score=100,
         )
-        environment_tuple = build_environment_tuple("debugging a build failure", {}, config, row_number=0)
-        database = environment_tuple["environment"]["database"]
-        constraints = default_constraints(database, config)
+        environment = build_environment_artifact(
+            "debugging a build failure",
+            {},
+            "",
+            {},
+            generated_schema(),
+            generated_records(),
+            row_number=0,
+        )
+        task_tuple = build_task_tuple(environment, task_config)
+        database = task_tuple["environment"]["database"]
+        constraints = default_constraints(database, task_config)
         answer = build_reference_answer(database, constraints)
 
         assert constraints["repair_notes"]
@@ -126,56 +243,58 @@ def test_constraints_are_repaired_when_user_values_are_unsat(self) -> None:
 
 
 class TestGeneralistAgentEnvColumnGenerator:
-    def test_generate_creates_environment_tuple(self) -> None:
+    def test_two_step_environment_then_task_generation(self) -> None:
         source_df = pd.DataFrame(
             {
-                "category": ["planning a travel itinerary"],
-                "constraints": ["visit museums and stay under a moderate budget"],
+                "topic": ["trip planning"],
+                "constraints": [
+                    {
+                        "goal": "build a three-day itinerary",
+                        "constraints": ["hotels, restaurants, and attractions", "moderate budget"],
+                        "success_criteria": ["reliable transport", "strong local evidence"],
+                    }
+                ],
+                "schema": [generated_schema()],
+                "records": [{"records": generated_records()}],
+                "notes": ["family-friendly museums, moderate budget, reliable transport"],
             }
         )
-        config = GeneralistAgentEnvColumnConfig(
-            name="agent_env",
-            task_category_column="category",
-            context_columns=["constraints"],
+        environment_config = GeneralistAgentEnvironmentColumnConfig(
+            name="agent_environment",
+            task_topic_column="topic",
+            task_constraints_column="constraints",
+            database_schema_column="schema",
+            database_records_column="records",
+            context_columns=["notes"],
         )
-        generator = make_generator(config)
-
-        result = generator.generate(source_df)
-        environment_tuple = result.loc[0, "agent_env"]
-
-        assert environment_tuple["schema_version"] == "generalist-agent-env/v1"
-        assert environment_tuple["environment"]["sandbox"]["base_tools"] == ["bash", "search"]
-        assert environment_tuple["environment"]["database_record_count"] == config.database_size
-        assert {tool["name"] for tool in environment_tuple["tools"]} == set(selected_tool_names("hard"))
-        assert environment_tuple["task"]["difficulty"] == "hard"
-        assert [iteration["difficulty"] for iteration in environment_tuple["task_iterations"]] == [
-            "simple",
-            "medium",
-            "hard",
-        ]
-        assert all(iteration["reference_solution_passed"] for iteration in environment_tuple["task_iterations"])
-        assert environment_tuple["solution"]["restrictions"] == [
-            "may call synthesized tool functions",
-            "may perform local logical computation",
-            "must not directly access the sandbox database",
-        ]
+        task_config = GeneralistAgentTaskColumnConfig(
+            name="agent_task",
+            environment_column="agent_environment",
+            difficulty="hard",
+            required_tag="reliable",
+        )
+        environment_generator = make_environment_generator(environment_config)
+        task_generator = make_task_generator(task_config)
+
+        with_environment = environment_generator.generate(source_df)
+        result = task_generator.generate(with_environment)
+        environment_artifact = result.loc[0, "agent_environment"]
+        task_tuple = result.loc[0, "agent_task"]
+        validation = verify_environment_tuple(task_tuple)
+
+        assert environment_artifact["schema_version"] == "generalist-agent-environment/v1"
+        assert environment_artifact["environment"]["data_generation"]["mode"] == "generated_by_data_designer_columns"
+        assert environment_artifact["environment"]["database_schema"]["record_type"] == "trip_candidate"
+        assert environment_artifact["environment"]["database"][0]["record_id"] == "trip-001"
+        assert task_tuple["schema_version"] == "generalist-agent-task/v1"
+        assert task_tuple["task"]["constraints"]["required_tag"] == "reliable"
+        assert "describe_schema" in task_tuple["solution"]["source"]
+        assert validation.passed is True
 
     def test_generated_python_sources_pass_verifier(self) -> None:
-        source_df = pd.DataFrame(
-            {
-                "category": ["planning a travel itinerary"],
-                "constraints": ["compare candidate plans by score, cost, and family suitability"],
-            }
-        )
-        config = GeneralistAgentEnvColumnConfig(
-            name="agent_env",
-            task_category_column="category",
-            context_columns=["constraints"],
-        )
-        generator = make_generator(config)
-        result = generator.generate(source_df)
+        task_tuple = build_valid_task_tuple()
 
-        validation = verify_environment_tuple(result.loc[0, "agent_env"])
+        validation = verify_environment_tuple(task_tuple)
 
         assert validation.passed is True
         assert validation.answer["record_id"]
@@ -183,20 +302,17 @@ def test_generated_python_sources_pass_verifier(self) -> None:
         assert [check.difficulty for check in validation.iteration_checks] == ["simple", "medium", "hard"]
 
     def test_row_record_validation_reads_named_output_column(self) -> None:
-        source_df = pd.DataFrame({"category": ["planning a travel itinerary"]})
-        config = GeneralistAgentEnvColumnConfig(name="agent_env", task_category_column="category")
-        generator = make_generator(config)
-        result = generator.generate(source_df)
+        task_tuple = build_valid_task_tuple()
+        row = pd.Series({"agent_task": task_tuple})
 
-        validation = verify_row_record(result.loc[0], output_column="agent_env")
+        validation = verify_row_record(row, output_column="agent_task")
 
         assert validation.passed is True
         assert validation.verifier_passed is True
 
     def test_row_record_validation_reports_missing_tool_implementation(self) -> None:
-        config = GeneralistAgentEnvColumnConfig(name="agent_env", task_category_column="category")
-        environment_tuple = build_environment_tuple("planning a travel itinerary", {}, config, row_number=0)
-        broken_tuple = deepcopy(environment_tuple)
+        task_tuple = build_valid_task_tuple()
+        broken_tuple = deepcopy(task_tuple)
         broken_tuple["tool_module_source"] = broken_tuple["tool_module_source"].replace(
             "def rank_records(",
             "def missing_rank_records(",
@@ -208,53 +324,70 @@ def test_row_record_validation_reports_missing_tool_implementation(self) -> None
         assert validation.tools_passed is False
         assert any("rank_records" in error for error in validation.errors)
 
-    def test_row_record_validation_accepts_parquet_restored_arrays(self, tmp_path: Path) -> None:
+    def test_rejects_generated_records_missing_required_fields(self) -> None:
         source_df = pd.DataFrame(
             {
-                "category": ["planning a travel itinerary"],
-                "constraints": ["compare candidate plans by score, cost, and family suitability"],
+                "topic": ["trip planning"],
+                "schema": [generated_schema()],
+                "records": [{"records": [{"record_id": "bad"}]}],
             }
         )
-        config = GeneralistAgentEnvColumnConfig(
-            name="agent_env",
-            task_category_column="category",
-            context_columns=["constraints"],
+        config = GeneralistAgentEnvironmentColumnConfig(
+            name="agent_environment",
+            task_topic_column="topic",
+            database_schema_column="schema",
+            database_records_column="records",
         )
-        generator = make_generator(config)
-        result = generator.generate(source_df)
-        environment_tuple = result.loc[0, "agent_env"]
+        generator = make_environment_generator(config)
+
+        with pytest.raises(ValueError, match="missing required fields"):
+            generator.generate(source_df)
+
+    def test_row_record_validation_accepts_parquet_restored_arrays(self, tmp_path: Path) -> None:
+        task_tuple = build_valid_task_tuple()
         dataset_path = tmp_path / "dataset.parquet"
-        pd.DataFrame({"agent_env": [environment_tuple]}).to_parquet(dataset_path)
+        pd.DataFrame({"agent_task": [task_tuple]}).to_parquet(dataset_path)
         restored = pd.read_parquet(dataset_path)
 
-        validation = verify_row_record(restored.loc[0], output_column="agent_env")
+        validation = verify_row_record(restored.loc[0], output_column="agent_task")
 
         assert validation.passed is True
-        assert validation.answer == environment_tuple["reference_answer"]
+        assert validation.answer == task_tuple["reference_answer"]
 
 
 class TestGeneralistAgentEnvPreviewIntegration:
     def test_preview_generates_environment_tuple(self, tmp_path: Path) -> None:
         seed_df = pd.DataFrame(
             {
-                "category": ["planning a travel itinerary"],
+                "topic": ["planning a travel itinerary"],
                 "constraints": ["compare candidate plans by score, cost, and family suitability"],
+                "schema": [generated_schema()],
+                "records": [{"records": generated_records()}],
+                "notes": ["family-friendly museums and restaurants"],
             }
         )
 
         builder = DataDesignerConfigBuilder()
         builder.with_seed_dataset(DataFrameSeedSource(df=seed_df))
         builder.add_column(
-            name="agent_env",
-            column_type="generalist-agent-env",
-            task_category_column="category",
-            context_columns=["constraints"],
+            name="agent_environment",
+            column_type="generalist-agent-environment",
+            task_topic_column="topic",
+            task_constraints_column="constraints",
+            database_schema_column="schema",
+            database_records_column="records",
+            context_columns=["notes"],
+        )
+        builder.add_column(
+            name="agent_task",
+            column_type="generalist-agent-task",
+            environment_column="agent_environment",
             required_tag="family",
         )
 
         result = DataDesigner(artifact_path=tmp_path / "artifacts").preview(builder, num_records=1)
 
         assert result.dataset is not None
-        environment_tuple = result.dataset.loc[0, "agent_env"]
+        environment_tuple = result.dataset.loc[0, "agent_task"]
         assert environment_tuple["task"]["constraints"]["required_tag"] == "family"
         assert environment_tuple["verifier"]["reference_solution_passed"] is True