From 73aae93e7e545d5c3d594e53c5bbb367462973a5 Mon Sep 17 00:00:00 2001
From: Christian Munley <cmunley@nvidia.com>
Date: Fri, 29 May 2026 21:59:19 -0700
Subject: [PATCH 1/2] init

Signed-off-by: Christian Munley <cmunley@nvidia.com>
---
 environments/prime_tau2_synth/README.md       | 53 ++++++++++++
 environments/prime_tau2_synth/__init__.py     |  0
 environments/prime_tau2_synth/config.yaml     | 27 ++++++
 environments/prime_tau2_synth/data/.gitignore |  5 ++
 .../prime_tau2_synth/data/example.jsonl       |  1 +
 environments/prime_tau2_synth/prepare.py      | 82 +++++++++++++++++++
 .../prime_tau2_synth_agent/__init__.py        |  0
 .../prime_tau2_synth_agent/app.py             | 18 ++++
 .../prime_tau2_synth_agent/requirements.txt   |  5 ++
 .../prime_tau2_synth_agent/tests/__init__.py  |  0
 10 files changed, 191 insertions(+)
 create mode 100644 environments/prime_tau2_synth/README.md
 create mode 100644 environments/prime_tau2_synth/__init__.py
 create mode 100644 environments/prime_tau2_synth/config.yaml
 create mode 100644 environments/prime_tau2_synth/data/.gitignore
 create mode 100644 environments/prime_tau2_synth/data/example.jsonl
 create mode 100644 environments/prime_tau2_synth/prepare.py
 create mode 100644 responses_api_agents/prime_tau2_synth_agent/__init__.py
 create mode 100644 responses_api_agents/prime_tau2_synth_agent/app.py
 create mode 100644 responses_api_agents/prime_tau2_synth_agent/requirements.txt
 create mode 100644 responses_api_agents/prime_tau2_synth_agent/tests/__init__.py

diff --git a/environments/prime_tau2_synth/README.md b/environments/prime_tau2_synth/README.md
new file mode 100644
index 000000000..9b9d9cb67
--- /dev/null
+++ b/environments/prime_tau2_synth/README.md
@@ -0,0 +1,53 @@
+# Description
+
+This environment runs τ²-bench with custom synthetic domains via Prime Intellect [verifiers](https://github.com/PrimeIntellect-ai/verifiers) environments. See [tau2-synth on Prime Intellect](https://app.primeintellect.ai/dashboard/environments/prime/tau2-synth) for the full description.
+
+Domains: `library`, `fitness_gym`, `tech_support`, `telecom`, `cloud_incident_response`, `daily_planner`, `ev_charging_support`.
+
+For installing additional prime environments from the Environments Hub and generating datasets, see [responses_api_agents/verifiers_agent/README.md](../../responses_api_agents/verifiers_agent/README.md).
+
+## Install Gym
+
+```
+git clone https://github.com/NVIDIA-NeMo/Gym
+cd Gym
+uv venv; source .venv/bin/activate; uv sync
+```
+
+## Test tau2-synth example
+
+First set `env.yaml`, for example for a vLLM served model:
+```
+policy_base_url: "http://localhost:8000/v1"
+policy_api_key: EMPTY
+policy_model_name: "Qwen/Qwen3-4B-Instruct-2507"
+```
+
+```
+# start nemo gym servers
+ng_run "+config_paths=[environments/prime_tau2_synth/config.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]"
+
+# generate a rollout
+ng_collect_rollouts \
+    +agent_name=prime_tau2_synth_agent \
+    +input_jsonl_fpath=environments/prime_tau2_synth/data/example.jsonl \
+    +output_jsonl_fpath=environments/prime_tau2_synth/data/example-rollouts.jsonl \
+    +limit=1
+
+# view the rollout
+tail -n 1 environments/prime_tau2_synth/data/example-rollouts.jsonl | jq | less
+```
+
+## Integration notes
+
+The patch to include prompt and generation token ids for preventing retokenization error when training with NeMo RL has been upstreamed into verifiers' `NeMoRLChatCompletionsClient`. We currently track verifiers `main` (`git+https://github.com/PrimeIntellect-ai/verifiers.git@main`) for this support; once verifiers `0.1.13` is released, the requirements pin will move to that tagged version.
+
+For installing new prime environments and generating datasets, use a separate venv (outside of Gym) to avoid dependency conflicts with the `exclude-dependencies` section of Gym `pyproject.toml` and the server's pinned verifiers version. After generating your dataset, deactivate the separate venv and return to the Gym venv for running servers. Make sure to restart NeMo Gym servers with `ng_run` after any environment changes to ensure the pinned version of verifiers is used.
+
+# Licensing information
+Code: Apache 2.0
+Data: N/A
+
+Dependencies
+- nemo_gym: Apache 2.0
+- verifiers: Apache 2.0
diff --git a/environments/prime_tau2_synth/__init__.py b/environments/prime_tau2_synth/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/environments/prime_tau2_synth/config.yaml b/environments/prime_tau2_synth/config.yaml
new file mode 100644
index 000000000..aa273972c
--- /dev/null
+++ b/environments/prime_tau2_synth/config.yaml
@@ -0,0 +1,27 @@
+prime_tau2_synth_agent:
+  responses_api_agents:
+    prime_tau2_synth_agent:
+      entrypoint: app.py
+      domain: agent
+      description: tau2-bench with custom synthetic domains via the verifiers harness.
+      value: Improve multi-domain customer service and tool use capabilities.
+      model_server:
+        type: responses_api_models
+        name: policy_model
+      model_name: ""
+      vf_env_id: tau2-synth
+      vf_env_args:
+        domain: library
+        user_model: gpt-4.1
+        max_steps: 200
+        max_errors: 10
+      group_size: 1
+      max_concurrent_generation: -1
+      max_concurrent_scoring: -1
+      max_tokens: 16000
+      temperature: 1.0
+      top_p: 1.0
+      datasets:
+        - name: example
+          type: example
+          jsonl_fpath: environments/prime_tau2_synth/data/example.jsonl
diff --git a/environments/prime_tau2_synth/data/.gitignore b/environments/prime_tau2_synth/data/.gitignore
new file mode 100644
index 000000000..4424b6fde
--- /dev/null
+++ b/environments/prime_tau2_synth/data/.gitignore
@@ -0,0 +1,5 @@
+*train.jsonl
+*validation.jsonl
+*train_prepare.jsonl
+*validation_prepare.jsonl
+*example_prepare.jsonl
diff --git a/environments/prime_tau2_synth/data/example.jsonl b/environments/prime_tau2_synth/data/example.jsonl
new file mode 100644
index 000000000..3f5665fea
--- /dev/null
+++ b/environments/prime_tau2_synth/data/example.jsonl
@@ -0,0 +1 @@
+{"task_idx": 0, "vf_env_id": "tau2-synth", "responses_create_params": {"input": []}}
diff --git a/environments/prime_tau2_synth/prepare.py b/environments/prime_tau2_synth/prepare.py
new file mode 100644
index 000000000..0c6d86f1b
--- /dev/null
+++ b/environments/prime_tau2_synth/prepare.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Generate a Gym JSONL for the tau2-synth verifiers environment.
+
+Loads the tau2-synth env for a given domain and writes one row per task.
+
+Requires the agent venv (or a venv with verifiers + tau2-synth installed):
+    pip install 'verifiers @ git+https://github.com/PrimeIntellect-ai/verifiers.git@v0.1.14' \\
+        --extra-index-url https://hub.primeintellect.ai/prime/simple/
+    pip install 'tau2 @ git+https://github.com/eligotts/tau2-bench.git@4839dd6' tau2-synth==0.2.0
+
+Usage:
+    python environments/prime_tau2_synth/prepare.py --domain library --output environments/prime_tau2_synth/data/library_train.jsonl
+"""
+
+import argparse
+import json
+from pathlib import Path
+
+
+VF_ENV_ID = "tau2-synth"
+DOMAINS = [
+    "library",
+    "fitness_gym",
+    "tech_support",
+    "telecom",
+    "cloud_incident_response",
+    "daily_planner",
+    "ev_charging_support",
+]
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--domain", default="library", choices=DOMAINS)
+    parser.add_argument("--split", default="train", help="verifiers env split (typically 'train' or 'test')")
+    parser.add_argument("--limit", type=int, default=None)
+    parser.add_argument("--output", required=True)
+    args = parser.parse_args()
+
+    try:
+        import verifiers as vf
+    except ImportError:
+        raise ImportError("Install verifiers + tau2-synth first; see module docstring.")
+
+    env = vf.load_environment(VF_ENV_ID, domain=args.domain)
+    dataset = env.get_dataset(args.split)
+    rows = list(dataset)
+    if args.limit is not None:
+        rows = rows[: args.limit]
+
+    output_path = Path(args.output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    with output_path.open("w") as f:
+        for idx, row in enumerate(rows):
+            out = {
+                "task_idx": idx,
+                "vf_env_id": VF_ENV_ID,
+                "domain": args.domain,
+                "responses_create_params": {
+                    "input": [{"role": "user", "content": row.get("question", row.get("prompt", ""))}]
+                },
+                **{k: v for k, v in row.items() if k not in {"responses_create_params"}},
+            }
+            f.write(json.dumps(out) + "\n")
+
+    print(f"Wrote {len(rows)} rows to {output_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/responses_api_agents/prime_tau2_synth_agent/__init__.py b/responses_api_agents/prime_tau2_synth_agent/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/responses_api_agents/prime_tau2_synth_agent/app.py b/responses_api_agents/prime_tau2_synth_agent/app.py
new file mode 100644
index 000000000..b6da9c79b
--- /dev/null
+++ b/responses_api_agents/prime_tau2_synth_agent/app.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from responses_api_agents.verifiers_agent.app import VerifiersAgent
+
+
+if __name__ == "__main__":
+    VerifiersAgent.run_webserver()
diff --git a/responses_api_agents/prime_tau2_synth_agent/requirements.txt b/responses_api_agents/prime_tau2_synth_agent/requirements.txt
new file mode 100644
index 000000000..cdf844b1e
--- /dev/null
+++ b/responses_api_agents/prime_tau2_synth_agent/requirements.txt
@@ -0,0 +1,5 @@
+-e nemo-gym[dev] @ ../../
+verifiers @ git+https://github.com/PrimeIntellect-ai/verifiers.git@v0.1.14
+--extra-index-url https://hub.primeintellect.ai/prime/simple/
+tau2 @ git+https://github.com/eligotts/tau2-bench.git@4839dd6
+tau2-synth==0.2.0
diff --git a/responses_api_agents/prime_tau2_synth_agent/tests/__init__.py b/responses_api_agents/prime_tau2_synth_agent/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb

From f82bcc379462558cbe263571a19a533384f2807b Mon Sep 17 00:00:00 2001
From: Christian Munley <cmunley@nvidia.com>
Date: Fri, 29 May 2026 22:03:37 -0700
Subject: [PATCH 2/2] docs

Signed-off-by: Christian Munley <cmunley@nvidia.com>
---
 environments/prime_tau2_synth/README.md   | 10 ++--------
 environments/prime_tau2_synth/config.yaml |  4 ++--
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/environments/prime_tau2_synth/README.md b/environments/prime_tau2_synth/README.md
index 9b9d9cb67..c3215a45d 100644
--- a/environments/prime_tau2_synth/README.md
+++ b/environments/prime_tau2_synth/README.md
@@ -1,10 +1,10 @@
 # Description
 
-This environment runs τ²-bench with custom synthetic domains via Prime Intellect [verifiers](https://github.com/PrimeIntellect-ai/verifiers) environments. See [tau2-synth on Prime Intellect](https://app.primeintellect.ai/dashboard/environments/prime/tau2-synth) for the full description.
+This environment is for training on τ²-bench style tasks with custom synthetic data via [tau2-synth](https://app.primeintellect.ai/dashboard/environments/prime/tau2-synth) on Prime Intellect Environments hub through verifiers integration. See tau2-synth for the full description and configuration details.
 
 Domains: `library`, `fitness_gym`, `tech_support`, `telecom`, `cloud_incident_response`, `daily_planner`, `ev_charging_support`.
 
-For installing additional prime environments from the Environments Hub and generating datasets, see [responses_api_agents/verifiers_agent/README.md](../../responses_api_agents/verifiers_agent/README.md).
+For additional details on the prime verifiers integration in NeMo Gym, see [responses_api_agents/verifiers_agent/README.md](../../responses_api_agents/verifiers_agent/README.md).
 
 ## Install Gym
 
@@ -38,12 +38,6 @@ ng_collect_rollouts \
 tail -n 1 environments/prime_tau2_synth/data/example-rollouts.jsonl | jq | less
 ```
 
-## Integration notes
-
-The patch to include prompt and generation token ids for preventing retokenization error when training with NeMo RL has been upstreamed into verifiers' `NeMoRLChatCompletionsClient`. We currently track verifiers `main` (`git+https://github.com/PrimeIntellect-ai/verifiers.git@main`) for this support; once verifiers `0.1.13` is released, the requirements pin will move to that tagged version.
-
-For installing new prime environments and generating datasets, use a separate venv (outside of Gym) to avoid dependency conflicts with the `exclude-dependencies` section of Gym `pyproject.toml` and the server's pinned verifiers version. After generating your dataset, deactivate the separate venv and return to the Gym venv for running servers. Make sure to restart NeMo Gym servers with `ng_run` after any environment changes to ensure the pinned version of verifiers is used.
-
 # Licensing information
 Code: Apache 2.0
 Data: N/A
diff --git a/environments/prime_tau2_synth/config.yaml b/environments/prime_tau2_synth/config.yaml
index aa273972c..290fcc247 100644
--- a/environments/prime_tau2_synth/config.yaml
+++ b/environments/prime_tau2_synth/config.yaml
@@ -3,8 +3,8 @@ prime_tau2_synth_agent:
     prime_tau2_synth_agent:
       entrypoint: app.py
       domain: agent
-      description: tau2-bench with custom synthetic domains via the verifiers harness.
-      value: Improve multi-domain customer service and tool use capabilities.
+      description: tau2-bench style training with custom synthetic domains via Prime Intellect verifiers.
+      value: Improve multi-domain agentic customer service and tool use capabilities.
       model_server:
         type: responses_api_models
         name: policy_model