diff --git a/environments/prime_tau2_synth/README.md b/environments/prime_tau2_synth/README.md new file mode 100644 index 000000000..c3215a45d --- /dev/null +++ b/environments/prime_tau2_synth/README.md @@ -0,0 +1,47 @@ +# Description + +This environment is for training on τ²-bench style tasks with custom synthetic data via [tau2-synth](https://app.primeintellect.ai/dashboard/environments/prime/tau2-synth) on Prime Intellect Environments hub through verifiers integration. See tau2-synth for the full description and configuration details. + +Domains: `library`, `fitness_gym`, `tech_support`, `telecom`, `cloud_incident_response`, `daily_planner`, `ev_charging_support`. + +For additional details on the prime verifiers integration in NeMo Gym, see [responses_api_agents/verifiers_agent/README.md](../../responses_api_agents/verifiers_agent/README.md). + +## Install Gym + +``` +git clone https://github.com/NVIDIA-NeMo/Gym +cd Gym +uv venv; source .venv/bin/activate; uv sync +``` + +## Test tau2-synth example + +First set `env.yaml`, for example for a vLLM served model: +``` +policy_base_url: "http://localhost:8000/v1" +policy_api_key: EMPTY +policy_model_name: "Qwen/Qwen3-4B-Instruct-2507" +``` + +``` +# start nemo gym servers +ng_run "+config_paths=[environments/prime_tau2_synth/config.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]" + +# generate a rollout +ng_collect_rollouts \ + +agent_name=prime_tau2_synth_agent \ + +input_jsonl_fpath=environments/prime_tau2_synth/data/example.jsonl \ + +output_jsonl_fpath=environments/prime_tau2_synth/data/example-rollouts.jsonl \ + +limit=1 + +# view the rollout +tail -n 1 environments/prime_tau2_synth/data/example-rollouts.jsonl | jq | less +``` + +# Licensing information +Code: Apache 2.0 +Data: N/A + +Dependencies +- nemo_gym: Apache 2.0 +- verifiers: Apache 2.0 diff --git a/environments/prime_tau2_synth/__init__.py b/environments/prime_tau2_synth/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/environments/prime_tau2_synth/config.yaml b/environments/prime_tau2_synth/config.yaml new file mode 100644 index 000000000..290fcc247 --- /dev/null +++ b/environments/prime_tau2_synth/config.yaml @@ -0,0 +1,27 @@ +prime_tau2_synth_agent: + responses_api_agents: + prime_tau2_synth_agent: + entrypoint: app.py + domain: agent + description: tau2-bench style training with custom synthetic domains via Prime Intellect verifiers. + value: Improve multi-domain agentic customer service and tool use capabilities. + model_server: + type: responses_api_models + name: policy_model + model_name: "" + vf_env_id: tau2-synth + vf_env_args: + domain: library + user_model: gpt-4.1 + max_steps: 200 + max_errors: 10 + group_size: 1 + max_concurrent_generation: -1 + max_concurrent_scoring: -1 + max_tokens: 16000 + temperature: 1.0 + top_p: 1.0 + datasets: + - name: example + type: example + jsonl_fpath: environments/prime_tau2_synth/data/example.jsonl diff --git a/environments/prime_tau2_synth/data/.gitignore b/environments/prime_tau2_synth/data/.gitignore new file mode 100644 index 000000000..4424b6fde --- /dev/null +++ b/environments/prime_tau2_synth/data/.gitignore @@ -0,0 +1,5 @@ +*train.jsonl +*validation.jsonl +*train_prepare.jsonl +*validation_prepare.jsonl +*example_prepare.jsonl diff --git a/environments/prime_tau2_synth/data/example.jsonl b/environments/prime_tau2_synth/data/example.jsonl new file mode 100644 index 000000000..3f5665fea --- /dev/null +++ b/environments/prime_tau2_synth/data/example.jsonl @@ -0,0 +1 @@ +{"task_idx": 0, "vf_env_id": "tau2-synth", "responses_create_params": {"input": []}} diff --git a/environments/prime_tau2_synth/prepare.py b/environments/prime_tau2_synth/prepare.py new file mode 100644 index 000000000..0c6d86f1b --- /dev/null +++ b/environments/prime_tau2_synth/prepare.py @@ -0,0 +1,82 @@ +# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Generate a Gym JSONL for the tau2-synth verifiers environment. + +Loads the tau2-synth env for a given domain and writes one row per task. + +Requires the agent venv (or a venv with verifiers + tau2-synth installed): + pip install 'verifiers @ git+https://github.com/PrimeIntellect-ai/verifiers.git@v0.1.14' \\ + --extra-index-url https://hub.primeintellect.ai/prime/simple/ + pip install 'tau2 @ git+https://github.com/eligotts/tau2-bench.git@4839dd6' tau2-synth==0.2.0 + +Usage: + python environments/prime_tau2_synth/prepare.py --domain library --output environments/prime_tau2_synth/data/library_train.jsonl +""" + +import argparse +import json +from pathlib import Path + + +VF_ENV_ID = "tau2-synth" +DOMAINS = [ + "library", + "fitness_gym", + "tech_support", + "telecom", + "cloud_incident_response", + "daily_planner", + "ev_charging_support", +] + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--domain", default="library", choices=DOMAINS) + parser.add_argument("--split", default="train", help="verifiers env split (typically 'train' or 'test')") + parser.add_argument("--limit", type=int, default=None) + parser.add_argument("--output", required=True) + args = parser.parse_args() + + try: + import verifiers as vf + except ImportError: + raise ImportError("Install verifiers + tau2-synth first; see module docstring.") + + env = vf.load_environment(VF_ENV_ID, domain=args.domain) + dataset = env.get_dataset(args.split) + rows = list(dataset) + if args.limit is not None: + rows = rows[: args.limit] + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open("w") as f: + for idx, row in enumerate(rows): + out = { + "task_idx": idx, + "vf_env_id": VF_ENV_ID, + "domain": args.domain, + "responses_create_params": { + "input": [{"role": "user", "content": row.get("question", row.get("prompt", ""))}] + }, + **{k: v for k, v in row.items() if k not in {"responses_create_params"}}, + } + f.write(json.dumps(out) + "\n") + + print(f"Wrote {len(rows)} rows to {output_path}") + + +if __name__ == "__main__": + main() diff --git a/responses_api_agents/prime_tau2_synth_agent/__init__.py b/responses_api_agents/prime_tau2_synth_agent/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/responses_api_agents/prime_tau2_synth_agent/app.py b/responses_api_agents/prime_tau2_synth_agent/app.py new file mode 100644 index 000000000..b6da9c79b --- /dev/null +++ b/responses_api_agents/prime_tau2_synth_agent/app.py @@ -0,0 +1,18 @@ +# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from responses_api_agents.verifiers_agent.app import VerifiersAgent + + +if __name__ == "__main__": + VerifiersAgent.run_webserver() diff --git a/responses_api_agents/prime_tau2_synth_agent/requirements.txt b/responses_api_agents/prime_tau2_synth_agent/requirements.txt new file mode 100644 index 000000000..cdf844b1e --- /dev/null +++ b/responses_api_agents/prime_tau2_synth_agent/requirements.txt @@ -0,0 +1,5 @@ +-e nemo-gym[dev] @ ../../ +verifiers @ git+https://github.com/PrimeIntellect-ai/verifiers.git@v0.1.14 +--extra-index-url https://hub.primeintellect.ai/prime/simple/ +tau2 @ git+https://github.com/eligotts/tau2-bench.git@4839dd6 +tau2-synth==0.2.0 diff --git a/responses_api_agents/prime_tau2_synth_agent/tests/__init__.py b/responses_api_agents/prime_tau2_synth_agent/tests/__init__.py new file mode 100644 index 000000000..e69de29bb