From 73aae93e7e545d5c3d594e53c5bbb367462973a5 Mon Sep 17 00:00:00 2001 From: Christian Munley Date: Fri, 29 May 2026 21:59:19 -0700 Subject: [PATCH 1/2] init Signed-off-by: Christian Munley --- environments/prime_tau2_synth/README.md | 53 ++++++++++++ environments/prime_tau2_synth/__init__.py | 0 environments/prime_tau2_synth/config.yaml | 27 ++++++ environments/prime_tau2_synth/data/.gitignore | 5 ++ .../prime_tau2_synth/data/example.jsonl | 1 + environments/prime_tau2_synth/prepare.py | 82 +++++++++++++++++++ .../prime_tau2_synth_agent/__init__.py | 0 .../prime_tau2_synth_agent/app.py | 18 ++++ .../prime_tau2_synth_agent/requirements.txt | 5 ++ .../prime_tau2_synth_agent/tests/__init__.py | 0 10 files changed, 191 insertions(+) create mode 100644 environments/prime_tau2_synth/README.md create mode 100644 environments/prime_tau2_synth/__init__.py create mode 100644 environments/prime_tau2_synth/config.yaml create mode 100644 environments/prime_tau2_synth/data/.gitignore create mode 100644 environments/prime_tau2_synth/data/example.jsonl create mode 100644 environments/prime_tau2_synth/prepare.py create mode 100644 responses_api_agents/prime_tau2_synth_agent/__init__.py create mode 100644 responses_api_agents/prime_tau2_synth_agent/app.py create mode 100644 responses_api_agents/prime_tau2_synth_agent/requirements.txt create mode 100644 responses_api_agents/prime_tau2_synth_agent/tests/__init__.py diff --git a/environments/prime_tau2_synth/README.md b/environments/prime_tau2_synth/README.md new file mode 100644 index 000000000..9b9d9cb67 --- /dev/null +++ b/environments/prime_tau2_synth/README.md @@ -0,0 +1,53 @@ +# Description + +This environment runs τ²-bench with custom synthetic domains via Prime Intellect [verifiers](https://github.com/PrimeIntellect-ai/verifiers) environments. See [tau2-synth on Prime Intellect](https://app.primeintellect.ai/dashboard/environments/prime/tau2-synth) for the full description. + +Domains: `library`, `fitness_gym`, `tech_support`, `telecom`, `cloud_incident_response`, `daily_planner`, `ev_charging_support`. + +For installing additional prime environments from the Environments Hub and generating datasets, see [responses_api_agents/verifiers_agent/README.md](../../responses_api_agents/verifiers_agent/README.md). + +## Install Gym + +``` +git clone https://github.com/NVIDIA-NeMo/Gym +cd Gym +uv venv; source .venv/bin/activate; uv sync +``` + +## Test tau2-synth example + +First set `env.yaml`, for example for a vLLM served model: +``` +policy_base_url: "http://localhost:8000/v1" +policy_api_key: EMPTY +policy_model_name: "Qwen/Qwen3-4B-Instruct-2507" +``` + +``` +# start nemo gym servers +ng_run "+config_paths=[environments/prime_tau2_synth/config.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]" + +# generate a rollout +ng_collect_rollouts \ + +agent_name=prime_tau2_synth_agent \ + +input_jsonl_fpath=environments/prime_tau2_synth/data/example.jsonl \ + +output_jsonl_fpath=environments/prime_tau2_synth/data/example-rollouts.jsonl \ + +limit=1 + +# view the rollout +tail -n 1 environments/prime_tau2_synth/data/example-rollouts.jsonl | jq | less +``` + +## Integration notes + +The patch to include prompt and generation token ids for preventing retokenization error when training with NeMo RL has been upstreamed into verifiers' `NeMoRLChatCompletionsClient`. We currently track verifiers `main` (`git+https://github.com/PrimeIntellect-ai/verifiers.git@main`) for this support; once verifiers `0.1.13` is released, the requirements pin will move to that tagged version. + +For installing new prime environments and generating datasets, use a separate venv (outside of Gym) to avoid dependency conflicts with the `exclude-dependencies` section of Gym `pyproject.toml` and the server's pinned verifiers version. After generating your dataset, deactivate the separate venv and return to the Gym venv for running servers. Make sure to restart NeMo Gym servers with `ng_run` after any environment changes to ensure the pinned version of verifiers is used. + +# Licensing information +Code: Apache 2.0 +Data: N/A + +Dependencies +- nemo_gym: Apache 2.0 +- verifiers: Apache 2.0 diff --git a/environments/prime_tau2_synth/__init__.py b/environments/prime_tau2_synth/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/environments/prime_tau2_synth/config.yaml b/environments/prime_tau2_synth/config.yaml new file mode 100644 index 000000000..aa273972c --- /dev/null +++ b/environments/prime_tau2_synth/config.yaml @@ -0,0 +1,27 @@ +prime_tau2_synth_agent: + responses_api_agents: + prime_tau2_synth_agent: + entrypoint: app.py + domain: agent + description: tau2-bench with custom synthetic domains via the verifiers harness. + value: Improve multi-domain customer service and tool use capabilities. + model_server: + type: responses_api_models + name: policy_model + model_name: "" + vf_env_id: tau2-synth + vf_env_args: + domain: library + user_model: gpt-4.1 + max_steps: 200 + max_errors: 10 + group_size: 1 + max_concurrent_generation: -1 + max_concurrent_scoring: -1 + max_tokens: 16000 + temperature: 1.0 + top_p: 1.0 + datasets: + - name: example + type: example + jsonl_fpath: environments/prime_tau2_synth/data/example.jsonl diff --git a/environments/prime_tau2_synth/data/.gitignore b/environments/prime_tau2_synth/data/.gitignore new file mode 100644 index 000000000..4424b6fde --- /dev/null +++ b/environments/prime_tau2_synth/data/.gitignore @@ -0,0 +1,5 @@ +*train.jsonl +*validation.jsonl +*train_prepare.jsonl +*validation_prepare.jsonl +*example_prepare.jsonl diff --git a/environments/prime_tau2_synth/data/example.jsonl b/environments/prime_tau2_synth/data/example.jsonl new file mode 100644 index 000000000..3f5665fea --- /dev/null +++ b/environments/prime_tau2_synth/data/example.jsonl @@ -0,0 +1 @@ +{"task_idx": 0, "vf_env_id": "tau2-synth", "responses_create_params": {"input": []}} diff --git a/environments/prime_tau2_synth/prepare.py b/environments/prime_tau2_synth/prepare.py new file mode 100644 index 000000000..0c6d86f1b --- /dev/null +++ b/environments/prime_tau2_synth/prepare.py @@ -0,0 +1,82 @@ +# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Generate a Gym JSONL for the tau2-synth verifiers environment. + +Loads the tau2-synth env for a given domain and writes one row per task. + +Requires the agent venv (or a venv with verifiers + tau2-synth installed): + pip install 'verifiers @ git+https://github.com/PrimeIntellect-ai/verifiers.git@v0.1.14' \\ + --extra-index-url https://hub.primeintellect.ai/prime/simple/ + pip install 'tau2 @ git+https://github.com/eligotts/tau2-bench.git@4839dd6' tau2-synth==0.2.0 + +Usage: + python environments/prime_tau2_synth/prepare.py --domain library --output environments/prime_tau2_synth/data/library_train.jsonl +""" + +import argparse +import json +from pathlib import Path + + +VF_ENV_ID = "tau2-synth" +DOMAINS = [ + "library", + "fitness_gym", + "tech_support", + "telecom", + "cloud_incident_response", + "daily_planner", + "ev_charging_support", +] + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--domain", default="library", choices=DOMAINS) + parser.add_argument("--split", default="train", help="verifiers env split (typically 'train' or 'test')") + parser.add_argument("--limit", type=int, default=None) + parser.add_argument("--output", required=True) + args = parser.parse_args() + + try: + import verifiers as vf + except ImportError: + raise ImportError("Install verifiers + tau2-synth first; see module docstring.") + + env = vf.load_environment(VF_ENV_ID, domain=args.domain) + dataset = env.get_dataset(args.split) + rows = list(dataset) + if args.limit is not None: + rows = rows[: args.limit] + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open("w") as f: + for idx, row in enumerate(rows): + out = { + "task_idx": idx, + "vf_env_id": VF_ENV_ID, + "domain": args.domain, + "responses_create_params": { + "input": [{"role": "user", "content": row.get("question", row.get("prompt", ""))}] + }, + **{k: v for k, v in row.items() if k not in {"responses_create_params"}}, + } + f.write(json.dumps(out) + "\n") + + print(f"Wrote {len(rows)} rows to {output_path}") + + +if __name__ == "__main__": + main() diff --git a/responses_api_agents/prime_tau2_synth_agent/__init__.py b/responses_api_agents/prime_tau2_synth_agent/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/responses_api_agents/prime_tau2_synth_agent/app.py b/responses_api_agents/prime_tau2_synth_agent/app.py new file mode 100644 index 000000000..b6da9c79b --- /dev/null +++ b/responses_api_agents/prime_tau2_synth_agent/app.py @@ -0,0 +1,18 @@ +# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from responses_api_agents.verifiers_agent.app import VerifiersAgent + + +if __name__ == "__main__": + VerifiersAgent.run_webserver() diff --git a/responses_api_agents/prime_tau2_synth_agent/requirements.txt b/responses_api_agents/prime_tau2_synth_agent/requirements.txt new file mode 100644 index 000000000..cdf844b1e --- /dev/null +++ b/responses_api_agents/prime_tau2_synth_agent/requirements.txt @@ -0,0 +1,5 @@ +-e nemo-gym[dev] @ ../../ +verifiers @ git+https://github.com/PrimeIntellect-ai/verifiers.git@v0.1.14 +--extra-index-url https://hub.primeintellect.ai/prime/simple/ +tau2 @ git+https://github.com/eligotts/tau2-bench.git@4839dd6 +tau2-synth==0.2.0 diff --git a/responses_api_agents/prime_tau2_synth_agent/tests/__init__.py b/responses_api_agents/prime_tau2_synth_agent/tests/__init__.py new file mode 100644 index 000000000..e69de29bb From f82bcc379462558cbe263571a19a533384f2807b Mon Sep 17 00:00:00 2001 From: Christian Munley Date: Fri, 29 May 2026 22:03:37 -0700 Subject: [PATCH 2/2] docs Signed-off-by: Christian Munley --- environments/prime_tau2_synth/README.md | 10 ++-------- environments/prime_tau2_synth/config.yaml | 4 ++-- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/environments/prime_tau2_synth/README.md b/environments/prime_tau2_synth/README.md index 9b9d9cb67..c3215a45d 100644 --- a/environments/prime_tau2_synth/README.md +++ b/environments/prime_tau2_synth/README.md @@ -1,10 +1,10 @@ # Description -This environment runs τ²-bench with custom synthetic domains via Prime Intellect [verifiers](https://github.com/PrimeIntellect-ai/verifiers) environments. See [tau2-synth on Prime Intellect](https://app.primeintellect.ai/dashboard/environments/prime/tau2-synth) for the full description. +This environment is for training on τ²-bench style tasks with custom synthetic data via [tau2-synth](https://app.primeintellect.ai/dashboard/environments/prime/tau2-synth) on Prime Intellect Environments hub through verifiers integration. See tau2-synth for the full description and configuration details. Domains: `library`, `fitness_gym`, `tech_support`, `telecom`, `cloud_incident_response`, `daily_planner`, `ev_charging_support`. -For installing additional prime environments from the Environments Hub and generating datasets, see [responses_api_agents/verifiers_agent/README.md](../../responses_api_agents/verifiers_agent/README.md). +For additional details on the prime verifiers integration in NeMo Gym, see [responses_api_agents/verifiers_agent/README.md](../../responses_api_agents/verifiers_agent/README.md). ## Install Gym @@ -38,12 +38,6 @@ ng_collect_rollouts \ tail -n 1 environments/prime_tau2_synth/data/example-rollouts.jsonl | jq | less ``` -## Integration notes - -The patch to include prompt and generation token ids for preventing retokenization error when training with NeMo RL has been upstreamed into verifiers' `NeMoRLChatCompletionsClient`. We currently track verifiers `main` (`git+https://github.com/PrimeIntellect-ai/verifiers.git@main`) for this support; once verifiers `0.1.13` is released, the requirements pin will move to that tagged version. - -For installing new prime environments and generating datasets, use a separate venv (outside of Gym) to avoid dependency conflicts with the `exclude-dependencies` section of Gym `pyproject.toml` and the server's pinned verifiers version. After generating your dataset, deactivate the separate venv and return to the Gym venv for running servers. Make sure to restart NeMo Gym servers with `ng_run` after any environment changes to ensure the pinned version of verifiers is used. - # Licensing information Code: Apache 2.0 Data: N/A diff --git a/environments/prime_tau2_synth/config.yaml b/environments/prime_tau2_synth/config.yaml index aa273972c..290fcc247 100644 --- a/environments/prime_tau2_synth/config.yaml +++ b/environments/prime_tau2_synth/config.yaml @@ -3,8 +3,8 @@ prime_tau2_synth_agent: prime_tau2_synth_agent: entrypoint: app.py domain: agent - description: tau2-bench with custom synthetic domains via the verifiers harness. - value: Improve multi-domain customer service and tool use capabilities. + description: tau2-bench style training with custom synthetic domains via Prime Intellect verifiers. + value: Improve multi-domain agentic customer service and tool use capabilities. model_server: type: responses_api_models name: policy_model