diff --git a/environments/prime_general_agent/README.md b/environments/prime_general_agent/README.md new file mode 100644 index 000000000..3e86bb804 --- /dev/null +++ b/environments/prime_general_agent/README.md @@ -0,0 +1,45 @@ +# Description + +Self-growing toolbench environment that currently 4,418 synthetic tool-use tasks across 1,037 families and 5 difficulty tiers and can be used to generate more. + +Prime Intellect environment integrated through the verifiers library integration. + +Full description: https://app.primeintellect.ai/dashboard/environments/primeintellect/general-agent + +For installing additional prime environments from the Environments Hub and generating datasets, see [responses_api_agents/verifiers_agent/README.md](../../responses_api_agents/verifiers_agent/README.md). + +## Notes + +1. Use the local solver to start, not the default env id. Set `vf_env_id: general-agent-solver-local` in `config.yaml`. The default `general-agent` env id may route to a sandbox solver (including options with OpenCode or RLM) that requires a funded `PRIME_API_KEY`. The local solver runs in-process with no sandbox. +2. Dataset `info` must be at the row top level. The local solver reads `state["info"]["task_dir"]`. If you have a dataset where `info` is nested under `verifier_metadata`, lift it. `prepare.py` writes rows in the correct shape. +3. `agent_ref.name` must match the agent directory name (`prime_general_agent`). NeMo Gym dispatches rows to the agent by this name. +4. Corpus version must match the installed package. `info.task_dir` paths point into the installed `general-agent` package's bundled tasks/ tree. Regenerate the dataset whenever you bump the `general-agent` pin. +5. The requirements is currently pinned to verifiers main as the regular 0.1.14 pin seems too old, but this should be pinned soon. It is currently tested on verifiers>=0.1.15.dev2. + +## Quick start + +```bash +ng_run "+config_paths=[environments/prime_general_agent/config.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]" +``` + +```bash +ng_collect_rollouts \ + +agent_name=prime_general_agent \ + +input_jsonl_fpath=environments/prime_general_agent/data/example.jsonl \ + +output_jsonl_fpath=results/prime_general_agent_rollouts.jsonl \ + +limit=1 +``` + +## Prepare training data + +```bash +python environments/prime_general_agent/prepare.py --split train --output environments/prime_general_agent/data/train.jsonl +``` + +# Licensing information +Code: Apache 2.0 +Data: N/A + +Dependencies +- nemo_gym: Apache 2.0 +- verifiers: Apache 2.0 diff --git a/environments/prime_general_agent/__init__.py b/environments/prime_general_agent/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/environments/prime_general_agent/config.yaml b/environments/prime_general_agent/config.yaml new file mode 100644 index 000000000..1137caa05 --- /dev/null +++ b/environments/prime_general_agent/config.yaml @@ -0,0 +1,23 @@ +prime_general_agent: + responses_api_agents: + prime_general_agent: + entrypoint: app.py + domain: agent + description: Self-growing toolbench environment with synthetic tool-use tasks from Prime Intellect. + value: Improve multi-turn tool use across diverse task families. + model_server: + type: responses_api_models + name: policy_model + model_name: "" + vf_env_id: general-agent-solver-local + vf_env_args: {} + group_size: 1 + max_concurrent_generation: -1 + max_concurrent_scoring: -1 + max_tokens: 16000 + temperature: 1.0 + top_p: 1.0 + datasets: + - name: example + type: example + jsonl_fpath: environments/prime_general_agent/data/example.jsonl diff --git a/environments/prime_general_agent/data/.gitignore b/environments/prime_general_agent/data/.gitignore new file mode 100644 index 000000000..4424b6fde --- /dev/null +++ b/environments/prime_general_agent/data/.gitignore @@ -0,0 +1,5 @@ +*train.jsonl +*validation.jsonl +*train_prepare.jsonl +*validation_prepare.jsonl +*example_prepare.jsonl diff --git a/environments/prime_general_agent/data/example.jsonl b/environments/prime_general_agent/data/example.jsonl new file mode 100644 index 000000000..0aa57e560 --- /dev/null +++ b/environments/prime_general_agent/data/example.jsonl @@ -0,0 +1 @@ +{"task_idx": 0, "vf_env_id": "general-agent-solver-local", "agent_ref": {"type": "responses_api_agents", "name": "prime_general_agent"}, "responses_create_params": {"input": []}, "info": {}} diff --git a/environments/prime_general_agent/prepare.py b/environments/prime_general_agent/prepare.py new file mode 100644 index 000000000..6babaabff --- /dev/null +++ b/environments/prime_general_agent/prepare.py @@ -0,0 +1,90 @@ +# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Generate a Gym JSONL for the prime general-agent verifiers environment. + +The local solver does ``task_dir = Path(state["info"]["task_dir"])`` so the +``info`` dict must live at the top level of each row (not inside +``verifier_metadata``). This script writes rows in that lifted shape. + +The ``info.task_dir`` paths point into the installed ``general-agent`` package's +bundled tasks/ tree, so the package version must match the corpus version that +produced the dataset. + +Requires the agent venv (or a venv with verifiers + general-agent installed): + pip install 'verifiers @ git+https://github.com/PrimeIntellect-ai/verifiers.git@main' \\ + --extra-index-url https://hub.primeintellect.ai/primeintellect/simple/ + pip install general-agent==0.1.4 + +Usage: + python environments/prime_general_agent/prepare.py --output environments/prime_general_agent/data/train.jsonl +""" + +import argparse +import json +from pathlib import Path + + +VF_ENV_ID = "general-agent-solver-local" +AGENT_NAME = "prime_general_agent" + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("--split", default="train", help="verifiers env split (typically 'train' or 'test')") + parser.add_argument("--limit", type=int, default=None) + parser.add_argument("--output", required=True) + args = parser.parse_args() + + try: + import verifiers as vf + except ImportError: + raise ImportError("Install verifiers + general-agent first; see module docstring.") + + env = vf.load_environment(VF_ENV_ID) + dataset = env.get_dataset(args.split) + rows = list(dataset) + if args.limit is not None: + rows = rows[: args.limit] + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open("w") as f: + for idx, row in enumerate(rows): + # The local solver reads info.task_dir at the row top level. + info = row.get("info") + if isinstance(info, str): + info = json.loads(info) + info = info or {} + + prompt_input = [] + if "prompt" in row and row["prompt"]: + prompt_input.append({"role": "user", "content": row["prompt"]}) + elif "question" in row and row["question"]: + prompt_input.append({"role": "user", "content": row["question"]}) + + out = { + "task_idx": idx, + "vf_env_id": VF_ENV_ID, + "agent_ref": {"type": "responses_api_agents", "name": AGENT_NAME}, + "responses_create_params": {"input": prompt_input}, + "info": info, + **{k: v for k, v in row.items() if k not in {"responses_create_params", "info"}}, + } + f.write(json.dumps(out) + "\n") + + print(f"Wrote {len(rows)} rows to {output_path}") + + +if __name__ == "__main__": + main() diff --git a/responses_api_agents/prime_general_agent/__init__.py b/responses_api_agents/prime_general_agent/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/responses_api_agents/prime_general_agent/app.py b/responses_api_agents/prime_general_agent/app.py new file mode 100644 index 000000000..b6da9c79b --- /dev/null +++ b/responses_api_agents/prime_general_agent/app.py @@ -0,0 +1,18 @@ +# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from responses_api_agents.verifiers_agent.app import VerifiersAgent + + +if __name__ == "__main__": + VerifiersAgent.run_webserver() diff --git a/responses_api_agents/prime_general_agent/requirements.txt b/responses_api_agents/prime_general_agent/requirements.txt new file mode 100644 index 000000000..326eb2471 --- /dev/null +++ b/responses_api_agents/prime_general_agent/requirements.txt @@ -0,0 +1,4 @@ +-e nemo-gym[dev] @ ../../ +verifiers @ git+https://github.com/PrimeIntellect-ai/verifiers.git@main +--extra-index-url https://hub.primeintellect.ai/primeintellect/simple/ +general-agent==0.1.4 diff --git a/responses_api_agents/prime_general_agent/tests/__init__.py b/responses_api_agents/prime_general_agent/tests/__init__.py new file mode 100644 index 000000000..e69de29bb