diff --git a/environments/reasoning_gym_claude_code/README.md b/environments/reasoning_gym_claude_code/README.md new file mode 100644 index 000000000..9a3dc0400 --- /dev/null +++ b/environments/reasoning_gym_claude_code/README.md @@ -0,0 +1,50 @@ +# Reasoning Gym — Claude Code Agent + +Claude Code agent harness for reasoning gym tasks. + +Source benchmark: https://github.com/open-thought/reasoning-gym + +## Configuration + +Set Anthropic credentials in `env.yaml`: + +```yaml +anthropic_api_key: sk-ant-... +anthropic_model_name: claude-sonnet-4-6 +anthropic_base_url: null +``` + +For a local vLLM or Ollama endpoint that serves the Anthropic Messages API: + +```yaml +anthropic_api_key: EMPTY +anthropic_model_name: Qwen/Qwen3-4B-Instruct-2507 +anthropic_base_url: http://localhost:8000 +``` + +`anthropic_base_url` should not include `/v1`. Claude Code appends `/v1/messages` itself. + +See [`responses_api_agents/claude_code_agent`](../../responses_api_agents/claude_code_agent/README.md) for the full set of agent options (`thinking`, `max_thinking_tokens`, `allowed_tools`, `disallowed_tools`, `max_turns`, `timeout`, etc.). + +## Quick start + +```bash +ng_run "+config_paths=[environments/reasoning_gym_claude_code/config.yaml]" +``` + +```bash +ng_collect_rollouts \ + +agent_name=reasoning_gym_claude_code_agent \ + +input_jsonl_fpath=environments/reasoning_gym_claude_code/data/example.jsonl \ + +output_jsonl_fpath=results/reasoning_gym_claude_code_rollouts.jsonl +``` + +## Prepare training data + +```bash +python environments/reasoning_gym_claude_code/prepare.py --task knights_knaves --size 1000 --output environments/reasoning_gym_claude_code/data/train_knights_knaves.jsonl +``` + +See `prepare.py` for all available tasks, categories, and config options. + +Alternatively, a pre-built dataset is hosted on HuggingFace at [nvidia/Nemotron-RL-ReasoningGym-v1](https://huggingface.co/datasets/nvidia/Nemotron-RL-ReasoningGym-v1). diff --git a/environments/reasoning_gym_claude_code/__init__.py b/environments/reasoning_gym_claude_code/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/environments/reasoning_gym_claude_code/config.yaml b/environments/reasoning_gym_claude_code/config.yaml new file mode 100644 index 000000000..44871845c --- /dev/null +++ b/environments/reasoning_gym_claude_code/config.yaml @@ -0,0 +1,34 @@ +reasoning_gym: + resources_servers: + reasoning_gym: + entrypoint: app.py + domain: knowledge + verified: true + description: Claude Code agent harness for reasoning gym tasks + value: Evaluate model capabilities in the Claude Code agent harness + +reasoning_gym_claude_code_agent: + responses_api_agents: + claude_code_agent: + entrypoint: app.py + resources_server: + type: resources_servers + name: reasoning_gym + concurrency: 32 + model: ${anthropic_model_name} + anthropic_api_key: ${anthropic_api_key} + anthropic_base_url: ${anthropic_base_url} + max_turns: 30 + timeout: 300 + thinking: disabled + system_prompt: | + You are a precise reasoning assistant. You have access to Bash to run Python for calculations. + For every problem: think step by step, use code to verify when helpful, and state your final answer clearly. + datasets: + - name: example + type: example + jsonl_fpath: environments/reasoning_gym_claude_code/data/example.jsonl + - name: train + type: train + jsonl_fpath: environments/reasoning_gym_claude_code/data/train_knights_knaves.jsonl + license: Apache 2.0 diff --git a/environments/reasoning_gym_claude_code/data/.gitignore b/environments/reasoning_gym_claude_code/data/.gitignore new file mode 100644 index 000000000..4424b6fde --- /dev/null +++ b/environments/reasoning_gym_claude_code/data/.gitignore @@ -0,0 +1,5 @@ +*train.jsonl +*validation.jsonl +*train_prepare.jsonl +*validation_prepare.jsonl +*example_prepare.jsonl diff --git a/environments/reasoning_gym_claude_code/data/example.jsonl b/environments/reasoning_gym_claude_code/data/example.jsonl new file mode 100644 index 000000000..533ef47c8 --- /dev/null +++ b/environments/reasoning_gym_claude_code/data/example.jsonl @@ -0,0 +1,5 @@ +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")"}]}, "question": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")", "answer": "Zoey is a fool, and Riley is a sage.", "metadata": {"source_dataset": "knights_knaves", "source_index": 0, "statements": [["lying", 1], ["or", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Zoey", "Riley"], "knight_knave_terms": {"knight": "sage", "knave": "fool", "a_knight": "a sage", "a_knave": "a fool", "Knight": "Sage", "Knave": "Fool"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")", "answer": "Riley is a pioneer, and Olivia is a laggard.", "metadata": {"source_dataset": "knights_knaves", "source_index": 1, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["and", ["telling-truth", 1], ["lying", 0]]], "solution": [true, false], "names": ["Riley", "Olivia"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")"}]}, "question": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")", "answer": "Samuel is a saint, and Jacob is a sinner.", "metadata": {"source_dataset": "knights_knaves", "source_index": 2, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["->", ["telling-truth", 0], ["lying", 0]]], "solution": [true, false], "names": ["Samuel", "Jacob"], "knight_knave_terms": {"knight": "saint", "knave": "sinner", "a_knight": "a saint", "a_knave": "a sinner", "Knight": "Saint", "Knave": "Sinner"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")", "answer": "Olivia is a laggard, and Lily is a pioneer.", "metadata": {"source_dataset": "knights_knaves", "source_index": 3, "statements": [["and", ["lying", 1], ["telling-truth", 1]], ["->", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Olivia", "Lily"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")"}]}, "question": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")", "answer": "Mason is an altruist, and Jack is an egoist.", "metadata": {"source_dataset": "knights_knaves", "source_index": 4, "statements": [["->", ["lying", 1], ["telling-truth", 0]], ["not", ["telling-truth", 0]]], "solution": [true, false], "names": ["Mason", "Jack"], "knight_knave_terms": {"knight": "altruist", "knave": "egoist", "a_knight": "an altruist", "a_knave": "an egoist", "Knight": "Altruist", "Knave": "Egoist"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} diff --git a/environments/reasoning_gym_claude_code/prepare.py b/environments/reasoning_gym_claude_code/prepare.py new file mode 100644 index 000000000..397adeb9d --- /dev/null +++ b/environments/reasoning_gym_claude_code/prepare.py @@ -0,0 +1,315 @@ +# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +# Single task with default config +python environments/reasoning_gym_claude_code/prepare.py --task knights_knaves --size 500 --seed 42 --output data/train.jsonl + +# Multiple tasks (creates composite dataset with equal weights) +python environments/reasoning_gym_claude_code/prepare.py --tasks knights_knaves,syllogism --size 500 --output data/train.jsonl + +# All tasks in a category +python environments/reasoning_gym_claude_code/prepare.py --category logic --size 500 --output data/train.jsonl + +# All tasks from all categories +python environments/reasoning_gym_claude_code/prepare.py --all-tasks --size 5000 --output data/train_all.jsonl + +# Single task with custom config +python environments/reasoning_gym_claude_code/prepare.py --task knights_knaves --size 500 --config '{"n_people": 3}' --output data/train.jsonl +""" + +import argparse +import json +import sys +from pathlib import Path + +import reasoning_gym +from reasoning_gym.composite import DatasetSpec + + +TASK_CATEGORIES = { + "logic": [ + "knights_knaves", + "syllogism", + "propositional_logic", + "zebra_puzzles", + "aiw", + "circuit_logic", + "self_reference", + ], + "arithmetic": [ + "basic_arithmetic", + "chain_sum", + "leg_counting", + "prime_factorization", + "gcd", + "lcm", + "time_intervals", + "calendar_arithmetic", + "dice", + "products", + "decimal_chain_sum", + "decimal_arithmetic", + "count_bits", + "bitwise_arithmetic", + "number_format", + "fraction_simplification", + "power_function", + "gsm_symbolic", + ], + "algebra": [ + "simple_equations", + "polynomial_equations", + "polynomial_multiplication", + "simple_integration", + "intermediate_integration", + "complex_arithmetic", + ], + "algorithmic": [ + "word_sorting", + "string_insertion", + "sentence_reordering", + "count_primes", + "binary_alternation", + "manipulate_matrix", + "pool_matrix", + "base_conversion", + "jugs", + "graph_color", + "spiral_matrix", + "cryptarithm", + "ab", + "spell_backward", + "letter_counting", + "letter_jumble", + "number_sorting", + "caesar_cipher", + "rotate_matrix", + "rotten_oranges", + "palindrome_generation", + "palindrome_partitioning", + "game_of_life", + "game_of_life_halting", + "group_anagrams", + "isomorphic_strings", + "string_synthesis", + "binary_matrix", + "word_sequence_reversal", + "number_filtering", + "word_ladder", + "string_manipulation", + "ransom_note", + "string_splitting", + ], + "cognition": [ + "figlet_font", + "needle_haystack", + "modulo_grid", + "number_sequence", + "rubiks_cube", + "color_cube_rotation", + "rectangle_count", + ], + "arc": [ + "arc_1d", + "arc_agi", + "rearc", + ], + "code": [ + "bf", + "codeio", + ], + "games": [ + "boxnet", + "countdown", + "emoji_mystery", + "futoshiki", + "kakurasu", + "knight_swap", + "mahjong_puzzle", + "maze", + "mini_sudoku", + "n_queens", + "puzzle24", + "rush_hour", + "sokoban", + "sudoku", + "survo", + "tower_of_hanoi", + "tsumego", + ], + "geometry": [ + "advanced_geometry", + "simple_geometry", + ], + "graphs": [ + "course_schedule", + "family_relationships", + "largest_island", + "quantum_lock", + "shortest_path", + ], + "induction": [ + "acre", + "list_functions", + ], + "probability": [ + "coin_flip", + ], +} + + +def format_entry_to_nemo_gym(entry: dict) -> dict: + return { + "responses_create_params": { + "input": [{"role": "user", "content": entry["question"]}], + }, + **entry, + "agent_ref": {"type": "responses_api_agents", "name": "reasoning_gym_claude_code_agent"}, + } + + +def create_single_task_dataset(task_name: str, size: int, seed: int, config: dict = None) -> list[dict]: + config = config or {} + config["size"] = size + config["seed"] = seed + + print(f"Creating {task_name} dataset with {size} samples (seed={seed})...") + try: + dataset = reasoning_gym.create_dataset(task_name, **config) + except Exception as e: + print(f"Error creating dataset {task_name}: {e}") + return [] + + entries = [] + for i in range(size): + try: + entry = dataset[i] + entries.append(format_entry_to_nemo_gym(entry)) + except Exception as e: + print(f"Error generating entry {i} for {task_name}: {e}") + continue + + print(f"Generated {len(entries)} entries for {task_name}") + return entries + + +def create_composite_dataset(task_names: list[str], size: int, seed: int, config: dict = None) -> list[dict]: + config = config or {} + + specs = [DatasetSpec(name=name, weight=1.0, config=config.get(name, {})) for name in task_names] + + print(f"Creating composite dataset with {len(task_names)} tasks: {', '.join(task_names)}") + print(f"Total samples: {size}, seed: {seed}") + + try: + dataset = reasoning_gym.create_dataset("composite", size=size, seed=seed, datasets=specs) + except Exception as e: + print(f"Error creating composite dataset: {e}") + return [] + + entries = [] + for i in range(size): + try: + entry = dataset[i] + entries.append(format_entry_to_nemo_gym(entry)) + except Exception as e: + print(f"Error generating entry {i}: {e}") + continue + + task_counts = {} + for entry in entries: + task = entry["metadata"]["source_dataset"] + task_counts[task] = task_counts.get(task, 0) + 1 + + print(f"\nGenerated {len(entries)} total entries") + print("Task distribution:") + for task, count in sorted(task_counts.items()): + print(f" {task}: {count}") + + return entries + + +def main(): + parser = argparse.ArgumentParser( + description="Create reasoning_gym datasets in NeMo Gym format", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + + task_group = parser.add_mutually_exclusive_group(required=True) + task_group.add_argument("--task", type=str, help="Single task name (e.g., knights_knaves)") + task_group.add_argument("--tasks", type=str, help="Comma-separated list of tasks") + task_group.add_argument( + "--category", type=str, choices=list(TASK_CATEGORIES.keys()), help="Task category (e.g., logic, arithmetic)" + ) + task_group.add_argument("--all-tasks", action="store_true", help="Use all tasks from all categories") + + parser.add_argument("--size", type=int, default=500, help="Number of samples to generate (default: 500)") + parser.add_argument("--seed", type=int, default=42, help="Random seed (default: 42)") + parser.add_argument( + "--config", + type=str, + default="{}", + help="Task-specific config as JSON string (e.g., '{\"n_people\": 3}')", + ) + + parser.add_argument( + "--output", type=str, required=True, help="Output JSONL file path (e.g., data/train_knights_knaves.jsonl)" + ) + + args = parser.parse_args() + + try: + config = json.loads(args.config) + except json.JSONDecodeError as e: + print(f"Error parsing config JSON: {e}") + sys.exit(1) + + if args.task: + task_names = [args.task] + elif args.tasks: + task_names = [t.strip() for t in args.tasks.split(",")] + elif args.category: + task_names = TASK_CATEGORIES[args.category] + elif args.all_tasks: + task_names = [] + for category_tasks in TASK_CATEGORIES.values(): + task_names.extend(category_tasks) + else: + print("Error: Must specify --task, --tasks, --category, or --all-tasks") + sys.exit(1) + + if len(task_names) == 1: + entries = create_single_task_dataset(task_names[0], args.size, args.seed, config) + else: + entries = create_composite_dataset(task_names, args.size, args.seed, config) + + if not entries: + print("Error: No entries generated") + sys.exit(1) + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, "w") as f: + for entry in entries: + f.write(json.dumps(entry) + "\n") + + print(f"\nDataset saved to {output_path}") + print(f"Total entries: {len(entries)}") + + +if __name__ == "__main__": + main() diff --git a/environments/reasoning_gym_hermes/README.md b/environments/reasoning_gym_hermes/README.md new file mode 100644 index 000000000..49ce6a23e --- /dev/null +++ b/environments/reasoning_gym_hermes/README.md @@ -0,0 +1,28 @@ +# Reasoning Gym — Hermes Agent + +Hermes Agent with terminal, file, code_execution, skills, todo toolsets on reasoning_gym tasks. + +Source benchmark: https://github.com/open-thought/reasoning-gym + +## Quick start + +```bash +ng_run "+config_paths=[environments/reasoning_gym_hermes/config.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]" +``` + +```bash +ng_collect_rollouts \ + +agent_name=reasoning_gym_hermes_agent \ + +input_jsonl_fpath=environments/reasoning_gym_hermes/data/example.jsonl \ + +output_jsonl_fpath=results/reasoning_gym_hermes_rollouts.jsonl +``` + +## Prepare training data + +```bash +python environments/reasoning_gym_hermes/prepare.py --task knights_knaves --size 1000 --output environments/reasoning_gym_hermes/data/train_knights_knaves.jsonl +``` + +See `prepare.py` for all available tasks, categories, and config options. + +Alternatively, a pre-built dataset is hosted on HuggingFace at [nvidia/Nemotron-RL-ReasoningGym-v1](https://huggingface.co/datasets/nvidia/Nemotron-RL-ReasoningGym-v1). diff --git a/environments/reasoning_gym_hermes/__init__.py b/environments/reasoning_gym_hermes/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/environments/reasoning_gym_hermes/config.yaml b/environments/reasoning_gym_hermes/config.yaml new file mode 100644 index 000000000..e0d8447de --- /dev/null +++ b/environments/reasoning_gym_hermes/config.yaml @@ -0,0 +1,35 @@ +reasoning_gym: + resources_servers: + reasoning_gym: + entrypoint: app.py + domain: knowledge + verified: false + description: Hermes Agent with terminal, file, code_execution, skills, todo toolsets on reasoning_gym tasks + value: Improve model reasoning capabilities in hermes agent harness + +reasoning_gym_hermes_agent: + responses_api_agents: + hermes_agent: + entrypoint: app.py + resources_server: + type: resources_servers + name: reasoning_gym + model_server: + type: responses_api_models + name: policy_model + enabled_toolsets: [terminal, file, code_execution, skills, todo] + max_turns: 30 + concurrency: 32 + terminal_backend: local + terminal_timeout: 30 + system_prompt: | + You are a precise reasoning assistant. You have access to a terminal tool to run Python for calculations or verification. + Use it when computation would help. State your final answer clearly. + datasets: + - name: example + type: example + jsonl_fpath: environments/reasoning_gym_hermes/data/example.jsonl + - name: train + type: train + jsonl_fpath: environments/reasoning_gym_hermes/data/train_knights_knaves.jsonl + license: Apache 2.0 diff --git a/environments/reasoning_gym_hermes/data/.gitignore b/environments/reasoning_gym_hermes/data/.gitignore new file mode 100644 index 000000000..4424b6fde --- /dev/null +++ b/environments/reasoning_gym_hermes/data/.gitignore @@ -0,0 +1,5 @@ +*train.jsonl +*validation.jsonl +*train_prepare.jsonl +*validation_prepare.jsonl +*example_prepare.jsonl diff --git a/environments/reasoning_gym_hermes/data/example.jsonl b/environments/reasoning_gym_hermes/data/example.jsonl new file mode 100644 index 000000000..533ef47c8 --- /dev/null +++ b/environments/reasoning_gym_hermes/data/example.jsonl @@ -0,0 +1,5 @@ +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")"}]}, "question": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")", "answer": "Zoey is a fool, and Riley is a sage.", "metadata": {"source_dataset": "knights_knaves", "source_index": 0, "statements": [["lying", 1], ["or", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Zoey", "Riley"], "knight_knave_terms": {"knight": "sage", "knave": "fool", "a_knight": "a sage", "a_knave": "a fool", "Knight": "Sage", "Knave": "Fool"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")", "answer": "Riley is a pioneer, and Olivia is a laggard.", "metadata": {"source_dataset": "knights_knaves", "source_index": 1, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["and", ["telling-truth", 1], ["lying", 0]]], "solution": [true, false], "names": ["Riley", "Olivia"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")"}]}, "question": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")", "answer": "Samuel is a saint, and Jacob is a sinner.", "metadata": {"source_dataset": "knights_knaves", "source_index": 2, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["->", ["telling-truth", 0], ["lying", 0]]], "solution": [true, false], "names": ["Samuel", "Jacob"], "knight_knave_terms": {"knight": "saint", "knave": "sinner", "a_knight": "a saint", "a_knave": "a sinner", "Knight": "Saint", "Knave": "Sinner"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")", "answer": "Olivia is a laggard, and Lily is a pioneer.", "metadata": {"source_dataset": "knights_knaves", "source_index": 3, "statements": [["and", ["lying", 1], ["telling-truth", 1]], ["->", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Olivia", "Lily"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")"}]}, "question": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")", "answer": "Mason is an altruist, and Jack is an egoist.", "metadata": {"source_dataset": "knights_knaves", "source_index": 4, "statements": [["->", ["lying", 1], ["telling-truth", 0]], ["not", ["telling-truth", 0]]], "solution": [true, false], "names": ["Mason", "Jack"], "knight_knave_terms": {"knight": "altruist", "knave": "egoist", "a_knight": "an altruist", "a_knave": "an egoist", "Knight": "Altruist", "Knave": "Egoist"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} diff --git a/environments/reasoning_gym_hermes/prepare.py b/environments/reasoning_gym_hermes/prepare.py new file mode 100644 index 000000000..1d236e1f4 --- /dev/null +++ b/environments/reasoning_gym_hermes/prepare.py @@ -0,0 +1,315 @@ +# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +# Single task with default config +python environments/reasoning_gym_hermes/prepare.py --task knights_knaves --size 500 --seed 42 --output data/train.jsonl + +# Multiple tasks (creates composite dataset with equal weights) +python environments/reasoning_gym_hermes/prepare.py --tasks knights_knaves,syllogism --size 500 --output data/train.jsonl + +# All tasks in a category +python environments/reasoning_gym_hermes/prepare.py --category logic --size 500 --output data/train.jsonl + +# All tasks from all categories +python environments/reasoning_gym_hermes/prepare.py --all-tasks --size 5000 --output data/train_all.jsonl + +# Single task with custom config +python environments/reasoning_gym_hermes/prepare.py --task knights_knaves --size 500 --config '{"n_people": 3}' --output data/train.jsonl +""" + +import argparse +import json +import sys +from pathlib import Path + +import reasoning_gym +from reasoning_gym.composite import DatasetSpec + + +TASK_CATEGORIES = { + "logic": [ + "knights_knaves", + "syllogism", + "propositional_logic", + "zebra_puzzles", + "aiw", + "circuit_logic", + "self_reference", + ], + "arithmetic": [ + "basic_arithmetic", + "chain_sum", + "leg_counting", + "prime_factorization", + "gcd", + "lcm", + "time_intervals", + "calendar_arithmetic", + "dice", + "products", + "decimal_chain_sum", + "decimal_arithmetic", + "count_bits", + "bitwise_arithmetic", + "number_format", + "fraction_simplification", + "power_function", + "gsm_symbolic", + ], + "algebra": [ + "simple_equations", + "polynomial_equations", + "polynomial_multiplication", + "simple_integration", + "intermediate_integration", + "complex_arithmetic", + ], + "algorithmic": [ + "word_sorting", + "string_insertion", + "sentence_reordering", + "count_primes", + "binary_alternation", + "manipulate_matrix", + "pool_matrix", + "base_conversion", + "jugs", + "graph_color", + "spiral_matrix", + "cryptarithm", + "ab", + "spell_backward", + "letter_counting", + "letter_jumble", + "number_sorting", + "caesar_cipher", + "rotate_matrix", + "rotten_oranges", + "palindrome_generation", + "palindrome_partitioning", + "game_of_life", + "game_of_life_halting", + "group_anagrams", + "isomorphic_strings", + "string_synthesis", + "binary_matrix", + "word_sequence_reversal", + "number_filtering", + "word_ladder", + "string_manipulation", + "ransom_note", + "string_splitting", + ], + "cognition": [ + "figlet_font", + "needle_haystack", + "modulo_grid", + "number_sequence", + "rubiks_cube", + "color_cube_rotation", + "rectangle_count", + ], + "arc": [ + "arc_1d", + "arc_agi", + "rearc", + ], + "code": [ + "bf", + "codeio", + ], + "games": [ + "boxnet", + "countdown", + "emoji_mystery", + "futoshiki", + "kakurasu", + "knight_swap", + "mahjong_puzzle", + "maze", + "mini_sudoku", + "n_queens", + "puzzle24", + "rush_hour", + "sokoban", + "sudoku", + "survo", + "tower_of_hanoi", + "tsumego", + ], + "geometry": [ + "advanced_geometry", + "simple_geometry", + ], + "graphs": [ + "course_schedule", + "family_relationships", + "largest_island", + "quantum_lock", + "shortest_path", + ], + "induction": [ + "acre", + "list_functions", + ], + "probability": [ + "coin_flip", + ], +} + + +def format_entry_to_nemo_gym(entry: dict) -> dict: + return { + "responses_create_params": { + "input": [{"role": "user", "content": entry["question"]}], + }, + **entry, + "agent_ref": {"type": "responses_api_agents", "name": "reasoning_gym_hermes_agent"}, + } + + +def create_single_task_dataset(task_name: str, size: int, seed: int, config: dict = None) -> list[dict]: + config = config or {} + config["size"] = size + config["seed"] = seed + + print(f"Creating {task_name} dataset with {size} samples (seed={seed})...") + try: + dataset = reasoning_gym.create_dataset(task_name, **config) + except Exception as e: + print(f"Error creating dataset {task_name}: {e}") + return [] + + entries = [] + for i in range(size): + try: + entry = dataset[i] + entries.append(format_entry_to_nemo_gym(entry)) + except Exception as e: + print(f"Error generating entry {i} for {task_name}: {e}") + continue + + print(f"Generated {len(entries)} entries for {task_name}") + return entries + + +def create_composite_dataset(task_names: list[str], size: int, seed: int, config: dict = None) -> list[dict]: + config = config or {} + + specs = [DatasetSpec(name=name, weight=1.0, config=config.get(name, {})) for name in task_names] + + print(f"Creating composite dataset with {len(task_names)} tasks: {', '.join(task_names)}") + print(f"Total samples: {size}, seed: {seed}") + + try: + dataset = reasoning_gym.create_dataset("composite", size=size, seed=seed, datasets=specs) + except Exception as e: + print(f"Error creating composite dataset: {e}") + return [] + + entries = [] + for i in range(size): + try: + entry = dataset[i] + entries.append(format_entry_to_nemo_gym(entry)) + except Exception as e: + print(f"Error generating entry {i}: {e}") + continue + + task_counts = {} + for entry in entries: + task = entry["metadata"]["source_dataset"] + task_counts[task] = task_counts.get(task, 0) + 1 + + print(f"\nGenerated {len(entries)} total entries") + print("Task distribution:") + for task, count in sorted(task_counts.items()): + print(f" {task}: {count}") + + return entries + + +def main(): + parser = argparse.ArgumentParser( + description="Create reasoning_gym datasets in NeMo Gym format", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + + task_group = parser.add_mutually_exclusive_group(required=True) + task_group.add_argument("--task", type=str, help="Single task name (e.g., knights_knaves)") + task_group.add_argument("--tasks", type=str, help="Comma-separated list of tasks") + task_group.add_argument( + "--category", type=str, choices=list(TASK_CATEGORIES.keys()), help="Task category (e.g., logic, arithmetic)" + ) + task_group.add_argument("--all-tasks", action="store_true", help="Use all tasks from all categories") + + parser.add_argument("--size", type=int, default=500, help="Number of samples to generate (default: 500)") + parser.add_argument("--seed", type=int, default=42, help="Random seed (default: 42)") + parser.add_argument( + "--config", + type=str, + default="{}", + help="Task-specific config as JSON string (e.g., '{\"n_people\": 3}')", + ) + + parser.add_argument( + "--output", type=str, required=True, help="Output JSONL file path (e.g., data/train_knights_knaves.jsonl)" + ) + + args = parser.parse_args() + + try: + config = json.loads(args.config) + except json.JSONDecodeError as e: + print(f"Error parsing config JSON: {e}") + sys.exit(1) + + if args.task: + task_names = [args.task] + elif args.tasks: + task_names = [t.strip() for t in args.tasks.split(",")] + elif args.category: + task_names = TASK_CATEGORIES[args.category] + elif args.all_tasks: + task_names = [] + for category_tasks in TASK_CATEGORIES.values(): + task_names.extend(category_tasks) + else: + print("Error: Must specify --task, --tasks, --category, or --all-tasks") + sys.exit(1) + + if len(task_names) == 1: + entries = create_single_task_dataset(task_names[0], args.size, args.seed, config) + else: + entries = create_composite_dataset(task_names, args.size, args.seed, config) + + if not entries: + print("Error: No entries generated") + sys.exit(1) + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, "w") as f: + for entry in entries: + f.write(json.dumps(entry) + "\n") + + print(f"\nDataset saved to {output_path}") + print(f"Total entries: {len(entries)}") + + +if __name__ == "__main__": + main() diff --git a/environments/reasoning_gym_orchestrator/README.md b/environments/reasoning_gym_orchestrator/README.md new file mode 100644 index 000000000..3220c438d --- /dev/null +++ b/environments/reasoning_gym_orchestrator/README.md @@ -0,0 +1,28 @@ +# Reasoning Gym — LangGraph Orchestrator Agent + +LangGraph orchestrator agent compatible with resource servers that do not use tools; enables diverse agent training data and test time scaling vs a simple agent, extensible to use tools or other agent architectures. + +Source benchmark: https://github.com/open-thought/reasoning-gym + +## Quick start + +```bash +ng_run "+config_paths=[environments/reasoning_gym_orchestrator/config.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]" +``` + +```bash +ng_collect_rollouts \ + +agent_name=reasoning_gym_orchestrator_agent \ + +input_jsonl_fpath=environments/reasoning_gym_orchestrator/data/example.jsonl \ + +output_jsonl_fpath=results/reasoning_gym_orchestrator_rollouts.jsonl +``` + +## Prepare training data + +```bash +python environments/reasoning_gym_orchestrator/prepare.py --task knights_knaves --size 1000 --output environments/reasoning_gym_orchestrator/data/train_knights_knaves.jsonl +``` + +See `prepare.py` for all available tasks, categories, and config options. + +Alternatively, a pre-built dataset is hosted on HuggingFace at [nvidia/Nemotron-RL-ReasoningGym-v1](https://huggingface.co/datasets/nvidia/Nemotron-RL-ReasoningGym-v1). diff --git a/environments/reasoning_gym_orchestrator/__init__.py b/environments/reasoning_gym_orchestrator/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/environments/reasoning_gym_orchestrator/config.yaml b/environments/reasoning_gym_orchestrator/config.yaml new file mode 100644 index 000000000..f01db3f9d --- /dev/null +++ b/environments/reasoning_gym_orchestrator/config.yaml @@ -0,0 +1,27 @@ +reasoning_gym: + resources_servers: + reasoning_gym: + entrypoint: app.py + domain: knowledge + description: LangGraph orchestrator agent compatible with resource servers that do not use tools; enables diverse agent training data and test time scaling vs a simple agent, extensible to use tools or other agent architectures + value: Iterative test time scaling for improved performance in reasoning tasks + verified: false + +reasoning_gym_orchestrator_agent: + responses_api_agents: + langgraph_agent: + entrypoint: orchestrator_agent.py + resources_server: + type: resources_servers + name: reasoning_gym + model_server: + type: responses_api_models + name: policy_model + datasets: + - name: example + type: example + jsonl_fpath: environments/reasoning_gym_orchestrator/data/example.jsonl + - name: train + type: train + jsonl_fpath: environments/reasoning_gym_orchestrator/data/train_knights_knaves.jsonl + license: Apache 2.0 diff --git a/environments/reasoning_gym_orchestrator/data/.gitignore b/environments/reasoning_gym_orchestrator/data/.gitignore new file mode 100644 index 000000000..4424b6fde --- /dev/null +++ b/environments/reasoning_gym_orchestrator/data/.gitignore @@ -0,0 +1,5 @@ +*train.jsonl +*validation.jsonl +*train_prepare.jsonl +*validation_prepare.jsonl +*example_prepare.jsonl diff --git a/environments/reasoning_gym_orchestrator/data/example.jsonl b/environments/reasoning_gym_orchestrator/data/example.jsonl new file mode 100644 index 000000000..533ef47c8 --- /dev/null +++ b/environments/reasoning_gym_orchestrator/data/example.jsonl @@ -0,0 +1,5 @@ +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")"}]}, "question": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")", "answer": "Zoey is a fool, and Riley is a sage.", "metadata": {"source_dataset": "knights_knaves", "source_index": 0, "statements": [["lying", 1], ["or", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Zoey", "Riley"], "knight_knave_terms": {"knight": "sage", "knave": "fool", "a_knight": "a sage", "a_knave": "a fool", "Knight": "Sage", "Knave": "Fool"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")", "answer": "Riley is a pioneer, and Olivia is a laggard.", "metadata": {"source_dataset": "knights_knaves", "source_index": 1, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["and", ["telling-truth", 1], ["lying", 0]]], "solution": [true, false], "names": ["Riley", "Olivia"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")"}]}, "question": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")", "answer": "Samuel is a saint, and Jacob is a sinner.", "metadata": {"source_dataset": "knights_knaves", "source_index": 2, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["->", ["telling-truth", 0], ["lying", 0]]], "solution": [true, false], "names": ["Samuel", "Jacob"], "knight_knave_terms": {"knight": "saint", "knave": "sinner", "a_knight": "a saint", "a_knave": "a sinner", "Knight": "Saint", "Knave": "Sinner"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")", "answer": "Olivia is a laggard, and Lily is a pioneer.", "metadata": {"source_dataset": "knights_knaves", "source_index": 3, "statements": [["and", ["lying", 1], ["telling-truth", 1]], ["->", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Olivia", "Lily"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")"}]}, "question": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")", "answer": "Mason is an altruist, and Jack is an egoist.", "metadata": {"source_dataset": "knights_knaves", "source_index": 4, "statements": [["->", ["lying", 1], ["telling-truth", 0]], ["not", ["telling-truth", 0]]], "solution": [true, false], "names": ["Mason", "Jack"], "knight_knave_terms": {"knight": "altruist", "knave": "egoist", "a_knight": "an altruist", "a_knave": "an egoist", "Knight": "Altruist", "Knave": "Egoist"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} diff --git a/environments/reasoning_gym_orchestrator/prepare.py b/environments/reasoning_gym_orchestrator/prepare.py new file mode 100644 index 000000000..fdc71a1f1 --- /dev/null +++ b/environments/reasoning_gym_orchestrator/prepare.py @@ -0,0 +1,315 @@ +# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +# Single task with default config +python environments/reasoning_gym_orchestrator/prepare.py --task knights_knaves --size 500 --seed 42 --output data/train.jsonl + +# Multiple tasks (creates composite dataset with equal weights) +python environments/reasoning_gym_orchestrator/prepare.py --tasks knights_knaves,syllogism --size 500 --output data/train.jsonl + +# All tasks in a category +python environments/reasoning_gym_orchestrator/prepare.py --category logic --size 500 --output data/train.jsonl + +# All tasks from all categories +python environments/reasoning_gym_orchestrator/prepare.py --all-tasks --size 5000 --output data/train_all.jsonl + +# Single task with custom config +python environments/reasoning_gym_orchestrator/prepare.py --task knights_knaves --size 500 --config '{"n_people": 3}' --output data/train.jsonl +""" + +import argparse +import json +import sys +from pathlib import Path + +import reasoning_gym +from reasoning_gym.composite import DatasetSpec + + +TASK_CATEGORIES = { + "logic": [ + "knights_knaves", + "syllogism", + "propositional_logic", + "zebra_puzzles", + "aiw", + "circuit_logic", + "self_reference", + ], + "arithmetic": [ + "basic_arithmetic", + "chain_sum", + "leg_counting", + "prime_factorization", + "gcd", + "lcm", + "time_intervals", + "calendar_arithmetic", + "dice", + "products", + "decimal_chain_sum", + "decimal_arithmetic", + "count_bits", + "bitwise_arithmetic", + "number_format", + "fraction_simplification", + "power_function", + "gsm_symbolic", + ], + "algebra": [ + "simple_equations", + "polynomial_equations", + "polynomial_multiplication", + "simple_integration", + "intermediate_integration", + "complex_arithmetic", + ], + "algorithmic": [ + "word_sorting", + "string_insertion", + "sentence_reordering", + "count_primes", + "binary_alternation", + "manipulate_matrix", + "pool_matrix", + "base_conversion", + "jugs", + "graph_color", + "spiral_matrix", + "cryptarithm", + "ab", + "spell_backward", + "letter_counting", + "letter_jumble", + "number_sorting", + "caesar_cipher", + "rotate_matrix", + "rotten_oranges", + "palindrome_generation", + "palindrome_partitioning", + "game_of_life", + "game_of_life_halting", + "group_anagrams", + "isomorphic_strings", + "string_synthesis", + "binary_matrix", + "word_sequence_reversal", + "number_filtering", + "word_ladder", + "string_manipulation", + "ransom_note", + "string_splitting", + ], + "cognition": [ + "figlet_font", + "needle_haystack", + "modulo_grid", + "number_sequence", + "rubiks_cube", + "color_cube_rotation", + "rectangle_count", + ], + "arc": [ + "arc_1d", + "arc_agi", + "rearc", + ], + "code": [ + "bf", + "codeio", + ], + "games": [ + "boxnet", + "countdown", + "emoji_mystery", + "futoshiki", + "kakurasu", + "knight_swap", + "mahjong_puzzle", + "maze", + "mini_sudoku", + "n_queens", + "puzzle24", + "rush_hour", + "sokoban", + "sudoku", + "survo", + "tower_of_hanoi", + "tsumego", + ], + "geometry": [ + "advanced_geometry", + "simple_geometry", + ], + "graphs": [ + "course_schedule", + "family_relationships", + "largest_island", + "quantum_lock", + "shortest_path", + ], + "induction": [ + "acre", + "list_functions", + ], + "probability": [ + "coin_flip", + ], +} + + +def format_entry_to_nemo_gym(entry: dict) -> dict: + return { + "responses_create_params": { + "input": [{"role": "user", "content": entry["question"]}], + }, + **entry, + "agent_ref": {"type": "responses_api_agents", "name": "reasoning_gym_orchestrator_agent"}, + } + + +def create_single_task_dataset(task_name: str, size: int, seed: int, config: dict = None) -> list[dict]: + config = config or {} + config["size"] = size + config["seed"] = seed + + print(f"Creating {task_name} dataset with {size} samples (seed={seed})...") + try: + dataset = reasoning_gym.create_dataset(task_name, **config) + except Exception as e: + print(f"Error creating dataset {task_name}: {e}") + return [] + + entries = [] + for i in range(size): + try: + entry = dataset[i] + entries.append(format_entry_to_nemo_gym(entry)) + except Exception as e: + print(f"Error generating entry {i} for {task_name}: {e}") + continue + + print(f"Generated {len(entries)} entries for {task_name}") + return entries + + +def create_composite_dataset(task_names: list[str], size: int, seed: int, config: dict = None) -> list[dict]: + config = config or {} + + specs = [DatasetSpec(name=name, weight=1.0, config=config.get(name, {})) for name in task_names] + + print(f"Creating composite dataset with {len(task_names)} tasks: {', '.join(task_names)}") + print(f"Total samples: {size}, seed: {seed}") + + try: + dataset = reasoning_gym.create_dataset("composite", size=size, seed=seed, datasets=specs) + except Exception as e: + print(f"Error creating composite dataset: {e}") + return [] + + entries = [] + for i in range(size): + try: + entry = dataset[i] + entries.append(format_entry_to_nemo_gym(entry)) + except Exception as e: + print(f"Error generating entry {i}: {e}") + continue + + task_counts = {} + for entry in entries: + task = entry["metadata"]["source_dataset"] + task_counts[task] = task_counts.get(task, 0) + 1 + + print(f"\nGenerated {len(entries)} total entries") + print("Task distribution:") + for task, count in sorted(task_counts.items()): + print(f" {task}: {count}") + + return entries + + +def main(): + parser = argparse.ArgumentParser( + description="Create reasoning_gym datasets in NeMo Gym format", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + + task_group = parser.add_mutually_exclusive_group(required=True) + task_group.add_argument("--task", type=str, help="Single task name (e.g., knights_knaves)") + task_group.add_argument("--tasks", type=str, help="Comma-separated list of tasks") + task_group.add_argument( + "--category", type=str, choices=list(TASK_CATEGORIES.keys()), help="Task category (e.g., logic, arithmetic)" + ) + task_group.add_argument("--all-tasks", action="store_true", help="Use all tasks from all categories") + + parser.add_argument("--size", type=int, default=500, help="Number of samples to generate (default: 500)") + parser.add_argument("--seed", type=int, default=42, help="Random seed (default: 42)") + parser.add_argument( + "--config", + type=str, + default="{}", + help="Task-specific config as JSON string (e.g., '{\"n_people\": 3}')", + ) + + parser.add_argument( + "--output", type=str, required=True, help="Output JSONL file path (e.g., data/train_knights_knaves.jsonl)" + ) + + args = parser.parse_args() + + try: + config = json.loads(args.config) + except json.JSONDecodeError as e: + print(f"Error parsing config JSON: {e}") + sys.exit(1) + + if args.task: + task_names = [args.task] + elif args.tasks: + task_names = [t.strip() for t in args.tasks.split(",")] + elif args.category: + task_names = TASK_CATEGORIES[args.category] + elif args.all_tasks: + task_names = [] + for category_tasks in TASK_CATEGORIES.values(): + task_names.extend(category_tasks) + else: + print("Error: Must specify --task, --tasks, --category, or --all-tasks") + sys.exit(1) + + if len(task_names) == 1: + entries = create_single_task_dataset(task_names[0], args.size, args.seed, config) + else: + entries = create_composite_dataset(task_names, args.size, args.seed, config) + + if not entries: + print("Error: No entries generated") + sys.exit(1) + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, "w") as f: + for entry in entries: + f.write(json.dumps(entry) + "\n") + + print(f"\nDataset saved to {output_path}") + print(f"Total entries: {len(entries)}") + + +if __name__ == "__main__": + main() diff --git a/environments/reasoning_gym_parallel_thinking/README.md b/environments/reasoning_gym_parallel_thinking/README.md new file mode 100644 index 000000000..a209b039e --- /dev/null +++ b/environments/reasoning_gym_parallel_thinking/README.md @@ -0,0 +1,28 @@ +# Reasoning Gym — LangGraph Parallel Thinking Agent + +LangGraph parallel thinking agent compatible with resource servers that do not use tools; enables diverse agent training data and test time scaling vs a simple agent, extensible to use tools or other agent architectures. + +Source benchmark: https://github.com/open-thought/reasoning-gym + +## Quick start + +```bash +ng_run "+config_paths=[environments/reasoning_gym_parallel_thinking/config.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]" +``` + +```bash +ng_collect_rollouts \ + +agent_name=reasoning_gym_parallel_thinking_agent \ + +input_jsonl_fpath=environments/reasoning_gym_parallel_thinking/data/example.jsonl \ + +output_jsonl_fpath=results/reasoning_gym_parallel_thinking_rollouts.jsonl +``` + +## Prepare training data + +```bash +python environments/reasoning_gym_parallel_thinking/prepare.py --task knights_knaves --size 1000 --output environments/reasoning_gym_parallel_thinking/data/train_knights_knaves.jsonl +``` + +See `prepare.py` for all available tasks, categories, and config options. + +Alternatively, a pre-built dataset is hosted on HuggingFace at [nvidia/Nemotron-RL-ReasoningGym-v1](https://huggingface.co/datasets/nvidia/Nemotron-RL-ReasoningGym-v1). diff --git a/environments/reasoning_gym_parallel_thinking/__init__.py b/environments/reasoning_gym_parallel_thinking/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/environments/reasoning_gym_parallel_thinking/config.yaml b/environments/reasoning_gym_parallel_thinking/config.yaml new file mode 100644 index 000000000..fc6f0bb78 --- /dev/null +++ b/environments/reasoning_gym_parallel_thinking/config.yaml @@ -0,0 +1,27 @@ +reasoning_gym: + resources_servers: + reasoning_gym: + entrypoint: app.py + domain: knowledge + description: LangGraph parallel thinking agent compatible with resource servers that do not use tools; enables diverse agent training data and test time scaling vs a simple agent, extensible to use tools or other agent architectures + value: Iterative test time scaling for improved performance in reasoning tasks + verified: false + +reasoning_gym_parallel_thinking_agent: + responses_api_agents: + langgraph_agent: + entrypoint: parallel_thinking_agent.py + resources_server: + type: resources_servers + name: reasoning_gym + model_server: + type: responses_api_models + name: policy_model + datasets: + - name: example + type: example + jsonl_fpath: environments/reasoning_gym_parallel_thinking/data/example.jsonl + - name: train + type: train + jsonl_fpath: environments/reasoning_gym_parallel_thinking/data/train_knights_knaves.jsonl + license: Apache 2.0 diff --git a/environments/reasoning_gym_parallel_thinking/data/.gitignore b/environments/reasoning_gym_parallel_thinking/data/.gitignore new file mode 100644 index 000000000..4424b6fde --- /dev/null +++ b/environments/reasoning_gym_parallel_thinking/data/.gitignore @@ -0,0 +1,5 @@ +*train.jsonl +*validation.jsonl +*train_prepare.jsonl +*validation_prepare.jsonl +*example_prepare.jsonl diff --git a/environments/reasoning_gym_parallel_thinking/data/example.jsonl b/environments/reasoning_gym_parallel_thinking/data/example.jsonl new file mode 100644 index 000000000..533ef47c8 --- /dev/null +++ b/environments/reasoning_gym_parallel_thinking/data/example.jsonl @@ -0,0 +1,5 @@ +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")"}]}, "question": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")", "answer": "Zoey is a fool, and Riley is a sage.", "metadata": {"source_dataset": "knights_knaves", "source_index": 0, "statements": [["lying", 1], ["or", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Zoey", "Riley"], "knight_knave_terms": {"knight": "sage", "knave": "fool", "a_knight": "a sage", "a_knave": "a fool", "Knight": "Sage", "Knave": "Fool"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")", "answer": "Riley is a pioneer, and Olivia is a laggard.", "metadata": {"source_dataset": "knights_knaves", "source_index": 1, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["and", ["telling-truth", 1], ["lying", 0]]], "solution": [true, false], "names": ["Riley", "Olivia"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")"}]}, "question": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")", "answer": "Samuel is a saint, and Jacob is a sinner.", "metadata": {"source_dataset": "knights_knaves", "source_index": 2, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["->", ["telling-truth", 0], ["lying", 0]]], "solution": [true, false], "names": ["Samuel", "Jacob"], "knight_knave_terms": {"knight": "saint", "knave": "sinner", "a_knight": "a saint", "a_knave": "a sinner", "Knight": "Saint", "Knave": "Sinner"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")", "answer": "Olivia is a laggard, and Lily is a pioneer.", "metadata": {"source_dataset": "knights_knaves", "source_index": 3, "statements": [["and", ["lying", 1], ["telling-truth", 1]], ["->", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Olivia", "Lily"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")"}]}, "question": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")", "answer": "Mason is an altruist, and Jack is an egoist.", "metadata": {"source_dataset": "knights_knaves", "source_index": 4, "statements": [["->", ["lying", 1], ["telling-truth", 0]], ["not", ["telling-truth", 0]]], "solution": [true, false], "names": ["Mason", "Jack"], "knight_knave_terms": {"knight": "altruist", "knave": "egoist", "a_knight": "an altruist", "a_knave": "an egoist", "Knight": "Altruist", "Knave": "Egoist"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} diff --git a/environments/reasoning_gym_parallel_thinking/prepare.py b/environments/reasoning_gym_parallel_thinking/prepare.py new file mode 100644 index 000000000..f716d9ef7 --- /dev/null +++ b/environments/reasoning_gym_parallel_thinking/prepare.py @@ -0,0 +1,315 @@ +# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +# Single task with default config +python environments/reasoning_gym_parallel_thinking/prepare.py --task knights_knaves --size 500 --seed 42 --output data/train.jsonl + +# Multiple tasks (creates composite dataset with equal weights) +python environments/reasoning_gym_parallel_thinking/prepare.py --tasks knights_knaves,syllogism --size 500 --output data/train.jsonl + +# All tasks in a category +python environments/reasoning_gym_parallel_thinking/prepare.py --category logic --size 500 --output data/train.jsonl + +# All tasks from all categories +python environments/reasoning_gym_parallel_thinking/prepare.py --all-tasks --size 5000 --output data/train_all.jsonl + +# Single task with custom config +python environments/reasoning_gym_parallel_thinking/prepare.py --task knights_knaves --size 500 --config '{"n_people": 3}' --output data/train.jsonl +""" + +import argparse +import json +import sys +from pathlib import Path + +import reasoning_gym +from reasoning_gym.composite import DatasetSpec + + +TASK_CATEGORIES = { + "logic": [ + "knights_knaves", + "syllogism", + "propositional_logic", + "zebra_puzzles", + "aiw", + "circuit_logic", + "self_reference", + ], + "arithmetic": [ + "basic_arithmetic", + "chain_sum", + "leg_counting", + "prime_factorization", + "gcd", + "lcm", + "time_intervals", + "calendar_arithmetic", + "dice", + "products", + "decimal_chain_sum", + "decimal_arithmetic", + "count_bits", + "bitwise_arithmetic", + "number_format", + "fraction_simplification", + "power_function", + "gsm_symbolic", + ], + "algebra": [ + "simple_equations", + "polynomial_equations", + "polynomial_multiplication", + "simple_integration", + "intermediate_integration", + "complex_arithmetic", + ], + "algorithmic": [ + "word_sorting", + "string_insertion", + "sentence_reordering", + "count_primes", + "binary_alternation", + "manipulate_matrix", + "pool_matrix", + "base_conversion", + "jugs", + "graph_color", + "spiral_matrix", + "cryptarithm", + "ab", + "spell_backward", + "letter_counting", + "letter_jumble", + "number_sorting", + "caesar_cipher", + "rotate_matrix", + "rotten_oranges", + "palindrome_generation", + "palindrome_partitioning", + "game_of_life", + "game_of_life_halting", + "group_anagrams", + "isomorphic_strings", + "string_synthesis", + "binary_matrix", + "word_sequence_reversal", + "number_filtering", + "word_ladder", + "string_manipulation", + "ransom_note", + "string_splitting", + ], + "cognition": [ + "figlet_font", + "needle_haystack", + "modulo_grid", + "number_sequence", + "rubiks_cube", + "color_cube_rotation", + "rectangle_count", + ], + "arc": [ + "arc_1d", + "arc_agi", + "rearc", + ], + "code": [ + "bf", + "codeio", + ], + "games": [ + "boxnet", + "countdown", + "emoji_mystery", + "futoshiki", + "kakurasu", + "knight_swap", + "mahjong_puzzle", + "maze", + "mini_sudoku", + "n_queens", + "puzzle24", + "rush_hour", + "sokoban", + "sudoku", + "survo", + "tower_of_hanoi", + "tsumego", + ], + "geometry": [ + "advanced_geometry", + "simple_geometry", + ], + "graphs": [ + "course_schedule", + "family_relationships", + "largest_island", + "quantum_lock", + "shortest_path", + ], + "induction": [ + "acre", + "list_functions", + ], + "probability": [ + "coin_flip", + ], +} + + +def format_entry_to_nemo_gym(entry: dict) -> dict: + return { + "responses_create_params": { + "input": [{"role": "user", "content": entry["question"]}], + }, + **entry, + "agent_ref": {"type": "responses_api_agents", "name": "reasoning_gym_parallel_thinking_agent"}, + } + + +def create_single_task_dataset(task_name: str, size: int, seed: int, config: dict = None) -> list[dict]: + config = config or {} + config["size"] = size + config["seed"] = seed + + print(f"Creating {task_name} dataset with {size} samples (seed={seed})...") + try: + dataset = reasoning_gym.create_dataset(task_name, **config) + except Exception as e: + print(f"Error creating dataset {task_name}: {e}") + return [] + + entries = [] + for i in range(size): + try: + entry = dataset[i] + entries.append(format_entry_to_nemo_gym(entry)) + except Exception as e: + print(f"Error generating entry {i} for {task_name}: {e}") + continue + + print(f"Generated {len(entries)} entries for {task_name}") + return entries + + +def create_composite_dataset(task_names: list[str], size: int, seed: int, config: dict = None) -> list[dict]: + config = config or {} + + specs = [DatasetSpec(name=name, weight=1.0, config=config.get(name, {})) for name in task_names] + + print(f"Creating composite dataset with {len(task_names)} tasks: {', '.join(task_names)}") + print(f"Total samples: {size}, seed: {seed}") + + try: + dataset = reasoning_gym.create_dataset("composite", size=size, seed=seed, datasets=specs) + except Exception as e: + print(f"Error creating composite dataset: {e}") + return [] + + entries = [] + for i in range(size): + try: + entry = dataset[i] + entries.append(format_entry_to_nemo_gym(entry)) + except Exception as e: + print(f"Error generating entry {i}: {e}") + continue + + task_counts = {} + for entry in entries: + task = entry["metadata"]["source_dataset"] + task_counts[task] = task_counts.get(task, 0) + 1 + + print(f"\nGenerated {len(entries)} total entries") + print("Task distribution:") + for task, count in sorted(task_counts.items()): + print(f" {task}: {count}") + + return entries + + +def main(): + parser = argparse.ArgumentParser( + description="Create reasoning_gym datasets in NeMo Gym format", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + + task_group = parser.add_mutually_exclusive_group(required=True) + task_group.add_argument("--task", type=str, help="Single task name (e.g., knights_knaves)") + task_group.add_argument("--tasks", type=str, help="Comma-separated list of tasks") + task_group.add_argument( + "--category", type=str, choices=list(TASK_CATEGORIES.keys()), help="Task category (e.g., logic, arithmetic)" + ) + task_group.add_argument("--all-tasks", action="store_true", help="Use all tasks from all categories") + + parser.add_argument("--size", type=int, default=500, help="Number of samples to generate (default: 500)") + parser.add_argument("--seed", type=int, default=42, help="Random seed (default: 42)") + parser.add_argument( + "--config", + type=str, + default="{}", + help="Task-specific config as JSON string (e.g., '{\"n_people\": 3}')", + ) + + parser.add_argument( + "--output", type=str, required=True, help="Output JSONL file path (e.g., data/train_knights_knaves.jsonl)" + ) + + args = parser.parse_args() + + try: + config = json.loads(args.config) + except json.JSONDecodeError as e: + print(f"Error parsing config JSON: {e}") + sys.exit(1) + + if args.task: + task_names = [args.task] + elif args.tasks: + task_names = [t.strip() for t in args.tasks.split(",")] + elif args.category: + task_names = TASK_CATEGORIES[args.category] + elif args.all_tasks: + task_names = [] + for category_tasks in TASK_CATEGORIES.values(): + task_names.extend(category_tasks) + else: + print("Error: Must specify --task, --tasks, --category, or --all-tasks") + sys.exit(1) + + if len(task_names) == 1: + entries = create_single_task_dataset(task_names[0], args.size, args.seed, config) + else: + entries = create_composite_dataset(task_names, args.size, args.seed, config) + + if not entries: + print("Error: No entries generated") + sys.exit(1) + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, "w") as f: + for entry in entries: + f.write(json.dumps(entry) + "\n") + + print(f"\nDataset saved to {output_path}") + print(f"Total entries: {len(entries)}") + + +if __name__ == "__main__": + main() diff --git a/environments/reasoning_gym_reflection/README.md b/environments/reasoning_gym_reflection/README.md new file mode 100644 index 000000000..64f010ca2 --- /dev/null +++ b/environments/reasoning_gym_reflection/README.md @@ -0,0 +1,28 @@ +# Reasoning Gym — LangGraph Reflection Agent + +LangGraph reflection agent compatible with resource servers that do not use tools; provides iterative reflection for diverse agent training data and test time scaling, extensible to use tools or other agent architectures. + +Source benchmark: https://github.com/open-thought/reasoning-gym + +## Quick start + +```bash +ng_run "+config_paths=[environments/reasoning_gym_reflection/config.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]" +``` + +```bash +ng_collect_rollouts \ + +agent_name=reasoning_gym_reflection_agent \ + +input_jsonl_fpath=environments/reasoning_gym_reflection/data/example.jsonl \ + +output_jsonl_fpath=results/reasoning_gym_reflection_rollouts.jsonl +``` + +## Prepare training data + +```bash +python environments/reasoning_gym_reflection/prepare.py --task knights_knaves --size 1000 --output environments/reasoning_gym_reflection/data/train_knights_knaves.jsonl +``` + +See `prepare.py` for all available tasks, categories, and config options. + +Alternatively, a pre-built dataset is hosted on HuggingFace at [nvidia/Nemotron-RL-ReasoningGym-v1](https://huggingface.co/datasets/nvidia/Nemotron-RL-ReasoningGym-v1). diff --git a/environments/reasoning_gym_reflection/__init__.py b/environments/reasoning_gym_reflection/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/environments/reasoning_gym_reflection/config.yaml b/environments/reasoning_gym_reflection/config.yaml new file mode 100644 index 000000000..9c95a502b --- /dev/null +++ b/environments/reasoning_gym_reflection/config.yaml @@ -0,0 +1,28 @@ +reasoning_gym: + resources_servers: + reasoning_gym: + entrypoint: app.py + domain: knowledge + description: LangGraph reflection agent compatible with resource servers that do not use tools; provides iterative reflection for diverse agent training data and test time scaling, extensible to use tools or other agent architectures + value: Iterative test time scaling for improved performance in reasoning tasks + verified: false + +reasoning_gym_reflection_agent: + responses_api_agents: + langgraph_agent: + entrypoint: reflection_agent.py + resources_server: + type: resources_servers + name: reasoning_gym + model_server: + type: responses_api_models + name: policy_model + max_reflections: 2 + datasets: + - name: example + type: example + jsonl_fpath: environments/reasoning_gym_reflection/data/example.jsonl + - name: train + type: train + jsonl_fpath: environments/reasoning_gym_reflection/data/train_knights_knaves.jsonl + license: Apache 2.0 diff --git a/environments/reasoning_gym_reflection/data/.gitignore b/environments/reasoning_gym_reflection/data/.gitignore new file mode 100644 index 000000000..4424b6fde --- /dev/null +++ b/environments/reasoning_gym_reflection/data/.gitignore @@ -0,0 +1,5 @@ +*train.jsonl +*validation.jsonl +*train_prepare.jsonl +*validation_prepare.jsonl +*example_prepare.jsonl diff --git a/environments/reasoning_gym_reflection/data/example.jsonl b/environments/reasoning_gym_reflection/data/example.jsonl new file mode 100644 index 000000000..533ef47c8 --- /dev/null +++ b/environments/reasoning_gym_reflection/data/example.jsonl @@ -0,0 +1,5 @@ +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")"}]}, "question": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")", "answer": "Zoey is a fool, and Riley is a sage.", "metadata": {"source_dataset": "knights_knaves", "source_index": 0, "statements": [["lying", 1], ["or", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Zoey", "Riley"], "knight_knave_terms": {"knight": "sage", "knave": "fool", "a_knight": "a sage", "a_knave": "a fool", "Knight": "Sage", "Knave": "Fool"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")", "answer": "Riley is a pioneer, and Olivia is a laggard.", "metadata": {"source_dataset": "knights_knaves", "source_index": 1, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["and", ["telling-truth", 1], ["lying", 0]]], "solution": [true, false], "names": ["Riley", "Olivia"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")"}]}, "question": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")", "answer": "Samuel is a saint, and Jacob is a sinner.", "metadata": {"source_dataset": "knights_knaves", "source_index": 2, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["->", ["telling-truth", 0], ["lying", 0]]], "solution": [true, false], "names": ["Samuel", "Jacob"], "knight_knave_terms": {"knight": "saint", "knave": "sinner", "a_knight": "a saint", "a_knave": "a sinner", "Knight": "Saint", "Knave": "Sinner"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")", "answer": "Olivia is a laggard, and Lily is a pioneer.", "metadata": {"source_dataset": "knights_knaves", "source_index": 3, "statements": [["and", ["lying", 1], ["telling-truth", 1]], ["->", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Olivia", "Lily"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")"}]}, "question": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")", "answer": "Mason is an altruist, and Jack is an egoist.", "metadata": {"source_dataset": "knights_knaves", "source_index": 4, "statements": [["->", ["lying", 1], ["telling-truth", 0]], ["not", ["telling-truth", 0]]], "solution": [true, false], "names": ["Mason", "Jack"], "knight_knave_terms": {"knight": "altruist", "knave": "egoist", "a_knight": "an altruist", "a_knave": "an egoist", "Knight": "Altruist", "Knave": "Egoist"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} diff --git a/environments/reasoning_gym_reflection/prepare.py b/environments/reasoning_gym_reflection/prepare.py new file mode 100644 index 000000000..2a5215813 --- /dev/null +++ b/environments/reasoning_gym_reflection/prepare.py @@ -0,0 +1,315 @@ +# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +# Single task with default config +python environments/reasoning_gym_reflection/prepare.py --task knights_knaves --size 500 --seed 42 --output data/train.jsonl + +# Multiple tasks (creates composite dataset with equal weights) +python environments/reasoning_gym_reflection/prepare.py --tasks knights_knaves,syllogism --size 500 --output data/train.jsonl + +# All tasks in a category +python environments/reasoning_gym_reflection/prepare.py --category logic --size 500 --output data/train.jsonl + +# All tasks from all categories +python environments/reasoning_gym_reflection/prepare.py --all-tasks --size 5000 --output data/train_all.jsonl + +# Single task with custom config +python environments/reasoning_gym_reflection/prepare.py --task knights_knaves --size 500 --config '{"n_people": 3}' --output data/train.jsonl +""" + +import argparse +import json +import sys +from pathlib import Path + +import reasoning_gym +from reasoning_gym.composite import DatasetSpec + + +TASK_CATEGORIES = { + "logic": [ + "knights_knaves", + "syllogism", + "propositional_logic", + "zebra_puzzles", + "aiw", + "circuit_logic", + "self_reference", + ], + "arithmetic": [ + "basic_arithmetic", + "chain_sum", + "leg_counting", + "prime_factorization", + "gcd", + "lcm", + "time_intervals", + "calendar_arithmetic", + "dice", + "products", + "decimal_chain_sum", + "decimal_arithmetic", + "count_bits", + "bitwise_arithmetic", + "number_format", + "fraction_simplification", + "power_function", + "gsm_symbolic", + ], + "algebra": [ + "simple_equations", + "polynomial_equations", + "polynomial_multiplication", + "simple_integration", + "intermediate_integration", + "complex_arithmetic", + ], + "algorithmic": [ + "word_sorting", + "string_insertion", + "sentence_reordering", + "count_primes", + "binary_alternation", + "manipulate_matrix", + "pool_matrix", + "base_conversion", + "jugs", + "graph_color", + "spiral_matrix", + "cryptarithm", + "ab", + "spell_backward", + "letter_counting", + "letter_jumble", + "number_sorting", + "caesar_cipher", + "rotate_matrix", + "rotten_oranges", + "palindrome_generation", + "palindrome_partitioning", + "game_of_life", + "game_of_life_halting", + "group_anagrams", + "isomorphic_strings", + "string_synthesis", + "binary_matrix", + "word_sequence_reversal", + "number_filtering", + "word_ladder", + "string_manipulation", + "ransom_note", + "string_splitting", + ], + "cognition": [ + "figlet_font", + "needle_haystack", + "modulo_grid", + "number_sequence", + "rubiks_cube", + "color_cube_rotation", + "rectangle_count", + ], + "arc": [ + "arc_1d", + "arc_agi", + "rearc", + ], + "code": [ + "bf", + "codeio", + ], + "games": [ + "boxnet", + "countdown", + "emoji_mystery", + "futoshiki", + "kakurasu", + "knight_swap", + "mahjong_puzzle", + "maze", + "mini_sudoku", + "n_queens", + "puzzle24", + "rush_hour", + "sokoban", + "sudoku", + "survo", + "tower_of_hanoi", + "tsumego", + ], + "geometry": [ + "advanced_geometry", + "simple_geometry", + ], + "graphs": [ + "course_schedule", + "family_relationships", + "largest_island", + "quantum_lock", + "shortest_path", + ], + "induction": [ + "acre", + "list_functions", + ], + "probability": [ + "coin_flip", + ], +} + + +def format_entry_to_nemo_gym(entry: dict) -> dict: + return { + "responses_create_params": { + "input": [{"role": "user", "content": entry["question"]}], + }, + **entry, + "agent_ref": {"type": "responses_api_agents", "name": "reasoning_gym_reflection_agent"}, + } + + +def create_single_task_dataset(task_name: str, size: int, seed: int, config: dict = None) -> list[dict]: + config = config or {} + config["size"] = size + config["seed"] = seed + + print(f"Creating {task_name} dataset with {size} samples (seed={seed})...") + try: + dataset = reasoning_gym.create_dataset(task_name, **config) + except Exception as e: + print(f"Error creating dataset {task_name}: {e}") + return [] + + entries = [] + for i in range(size): + try: + entry = dataset[i] + entries.append(format_entry_to_nemo_gym(entry)) + except Exception as e: + print(f"Error generating entry {i} for {task_name}: {e}") + continue + + print(f"Generated {len(entries)} entries for {task_name}") + return entries + + +def create_composite_dataset(task_names: list[str], size: int, seed: int, config: dict = None) -> list[dict]: + config = config or {} + + specs = [DatasetSpec(name=name, weight=1.0, config=config.get(name, {})) for name in task_names] + + print(f"Creating composite dataset with {len(task_names)} tasks: {', '.join(task_names)}") + print(f"Total samples: {size}, seed: {seed}") + + try: + dataset = reasoning_gym.create_dataset("composite", size=size, seed=seed, datasets=specs) + except Exception as e: + print(f"Error creating composite dataset: {e}") + return [] + + entries = [] + for i in range(size): + try: + entry = dataset[i] + entries.append(format_entry_to_nemo_gym(entry)) + except Exception as e: + print(f"Error generating entry {i}: {e}") + continue + + task_counts = {} + for entry in entries: + task = entry["metadata"]["source_dataset"] + task_counts[task] = task_counts.get(task, 0) + 1 + + print(f"\nGenerated {len(entries)} total entries") + print("Task distribution:") + for task, count in sorted(task_counts.items()): + print(f" {task}: {count}") + + return entries + + +def main(): + parser = argparse.ArgumentParser( + description="Create reasoning_gym datasets in NeMo Gym format", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + + task_group = parser.add_mutually_exclusive_group(required=True) + task_group.add_argument("--task", type=str, help="Single task name (e.g., knights_knaves)") + task_group.add_argument("--tasks", type=str, help="Comma-separated list of tasks") + task_group.add_argument( + "--category", type=str, choices=list(TASK_CATEGORIES.keys()), help="Task category (e.g., logic, arithmetic)" + ) + task_group.add_argument("--all-tasks", action="store_true", help="Use all tasks from all categories") + + parser.add_argument("--size", type=int, default=500, help="Number of samples to generate (default: 500)") + parser.add_argument("--seed", type=int, default=42, help="Random seed (default: 42)") + parser.add_argument( + "--config", + type=str, + default="{}", + help="Task-specific config as JSON string (e.g., '{\"n_people\": 3}')", + ) + + parser.add_argument( + "--output", type=str, required=True, help="Output JSONL file path (e.g., data/train_knights_knaves.jsonl)" + ) + + args = parser.parse_args() + + try: + config = json.loads(args.config) + except json.JSONDecodeError as e: + print(f"Error parsing config JSON: {e}") + sys.exit(1) + + if args.task: + task_names = [args.task] + elif args.tasks: + task_names = [t.strip() for t in args.tasks.split(",")] + elif args.category: + task_names = TASK_CATEGORIES[args.category] + elif args.all_tasks: + task_names = [] + for category_tasks in TASK_CATEGORIES.values(): + task_names.extend(category_tasks) + else: + print("Error: Must specify --task, --tasks, --category, or --all-tasks") + sys.exit(1) + + if len(task_names) == 1: + entries = create_single_task_dataset(task_names[0], args.size, args.seed, config) + else: + entries = create_composite_dataset(task_names, args.size, args.seed, config) + + if not entries: + print("Error: No entries generated") + sys.exit(1) + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, "w") as f: + for entry in entries: + f.write(json.dumps(entry) + "\n") + + print(f"\nDataset saved to {output_path}") + print(f"Total entries: {len(entries)}") + + +if __name__ == "__main__": + main() diff --git a/environments/reasoning_gym_rewoo/README.md b/environments/reasoning_gym_rewoo/README.md new file mode 100644 index 000000000..5b986aba5 --- /dev/null +++ b/environments/reasoning_gym_rewoo/README.md @@ -0,0 +1,28 @@ +# Reasoning Gym — LangGraph ReWOO Agent + +LangGraph ReWOO agent compatible with resource servers that do not use tools; enables diverse agent training data and test time scaling vs a simple agent, extensible to use tools or other agent architectures. + +Source benchmark: https://github.com/open-thought/reasoning-gym + +## Quick start + +```bash +ng_run "+config_paths=[environments/reasoning_gym_rewoo/config.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]" +``` + +```bash +ng_collect_rollouts \ + +agent_name=reasoning_gym_rewoo_agent \ + +input_jsonl_fpath=environments/reasoning_gym_rewoo/data/example.jsonl \ + +output_jsonl_fpath=results/reasoning_gym_rewoo_rollouts.jsonl +``` + +## Prepare training data + +```bash +python environments/reasoning_gym_rewoo/prepare.py --task knights_knaves --size 1000 --output environments/reasoning_gym_rewoo/data/train_knights_knaves.jsonl +``` + +See `prepare.py` for all available tasks, categories, and config options. + +Alternatively, a pre-built dataset is hosted on HuggingFace at [nvidia/Nemotron-RL-ReasoningGym-v1](https://huggingface.co/datasets/nvidia/Nemotron-RL-ReasoningGym-v1). diff --git a/environments/reasoning_gym_rewoo/__init__.py b/environments/reasoning_gym_rewoo/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/environments/reasoning_gym_rewoo/config.yaml b/environments/reasoning_gym_rewoo/config.yaml new file mode 100644 index 000000000..7db7b87a8 --- /dev/null +++ b/environments/reasoning_gym_rewoo/config.yaml @@ -0,0 +1,27 @@ +reasoning_gym: + resources_servers: + reasoning_gym: + entrypoint: app.py + domain: knowledge + description: LangGraph ReWOO agent compatible with resource servers that do not use tools; enables diverse agent training data and test time scaling vs a simple agent, extensible to use tools or other agent architectures + value: Iterative test time scaling for improved performance in reasoning tasks + verified: false + +reasoning_gym_rewoo_agent: + responses_api_agents: + langgraph_agent: + entrypoint: rewoo_agent.py + resources_server: + type: resources_servers + name: reasoning_gym + model_server: + type: responses_api_models + name: policy_model + datasets: + - name: example + type: example + jsonl_fpath: environments/reasoning_gym_rewoo/data/example.jsonl + - name: train + type: train + jsonl_fpath: environments/reasoning_gym_rewoo/data/train_knights_knaves.jsonl + license: Apache 2.0 diff --git a/environments/reasoning_gym_rewoo/data/.gitignore b/environments/reasoning_gym_rewoo/data/.gitignore new file mode 100644 index 000000000..4424b6fde --- /dev/null +++ b/environments/reasoning_gym_rewoo/data/.gitignore @@ -0,0 +1,5 @@ +*train.jsonl +*validation.jsonl +*train_prepare.jsonl +*validation_prepare.jsonl +*example_prepare.jsonl diff --git a/environments/reasoning_gym_rewoo/data/example.jsonl b/environments/reasoning_gym_rewoo/data/example.jsonl new file mode 100644 index 000000000..533ef47c8 --- /dev/null +++ b/environments/reasoning_gym_rewoo/data/example.jsonl @@ -0,0 +1,5 @@ +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")"}]}, "question": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")", "answer": "Zoey is a fool, and Riley is a sage.", "metadata": {"source_dataset": "knights_knaves", "source_index": 0, "statements": [["lying", 1], ["or", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Zoey", "Riley"], "knight_knave_terms": {"knight": "sage", "knave": "fool", "a_knight": "a sage", "a_knave": "a fool", "Knight": "Sage", "Knave": "Fool"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")", "answer": "Riley is a pioneer, and Olivia is a laggard.", "metadata": {"source_dataset": "knights_knaves", "source_index": 1, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["and", ["telling-truth", 1], ["lying", 0]]], "solution": [true, false], "names": ["Riley", "Olivia"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")"}]}, "question": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")", "answer": "Samuel is a saint, and Jacob is a sinner.", "metadata": {"source_dataset": "knights_knaves", "source_index": 2, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["->", ["telling-truth", 0], ["lying", 0]]], "solution": [true, false], "names": ["Samuel", "Jacob"], "knight_knave_terms": {"knight": "saint", "knave": "sinner", "a_knight": "a saint", "a_knave": "a sinner", "Knight": "Saint", "Knave": "Sinner"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")", "answer": "Olivia is a laggard, and Lily is a pioneer.", "metadata": {"source_dataset": "knights_knaves", "source_index": 3, "statements": [["and", ["lying", 1], ["telling-truth", 1]], ["->", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Olivia", "Lily"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} +{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")"}]}, "question": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")", "answer": "Mason is an altruist, and Jack is an egoist.", "metadata": {"source_dataset": "knights_knaves", "source_index": 4, "statements": [["->", ["lying", 1], ["telling-truth", 0]], ["not", ["telling-truth", 0]]], "solution": [true, false], "names": ["Mason", "Jack"], "knight_knave_terms": {"knight": "altruist", "knave": "egoist", "a_knight": "an altruist", "a_knave": "an egoist", "Knight": "Altruist", "Knave": "Egoist"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}} diff --git a/environments/reasoning_gym_rewoo/prepare.py b/environments/reasoning_gym_rewoo/prepare.py new file mode 100644 index 000000000..82624393a --- /dev/null +++ b/environments/reasoning_gym_rewoo/prepare.py @@ -0,0 +1,315 @@ +# Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +# Single task with default config +python environments/reasoning_gym_rewoo/prepare.py --task knights_knaves --size 500 --seed 42 --output data/train.jsonl + +# Multiple tasks (creates composite dataset with equal weights) +python environments/reasoning_gym_rewoo/prepare.py --tasks knights_knaves,syllogism --size 500 --output data/train.jsonl + +# All tasks in a category +python environments/reasoning_gym_rewoo/prepare.py --category logic --size 500 --output data/train.jsonl + +# All tasks from all categories +python environments/reasoning_gym_rewoo/prepare.py --all-tasks --size 5000 --output data/train_all.jsonl + +# Single task with custom config +python environments/reasoning_gym_rewoo/prepare.py --task knights_knaves --size 500 --config '{"n_people": 3}' --output data/train.jsonl +""" + +import argparse +import json +import sys +from pathlib import Path + +import reasoning_gym +from reasoning_gym.composite import DatasetSpec + + +TASK_CATEGORIES = { + "logic": [ + "knights_knaves", + "syllogism", + "propositional_logic", + "zebra_puzzles", + "aiw", + "circuit_logic", + "self_reference", + ], + "arithmetic": [ + "basic_arithmetic", + "chain_sum", + "leg_counting", + "prime_factorization", + "gcd", + "lcm", + "time_intervals", + "calendar_arithmetic", + "dice", + "products", + "decimal_chain_sum", + "decimal_arithmetic", + "count_bits", + "bitwise_arithmetic", + "number_format", + "fraction_simplification", + "power_function", + "gsm_symbolic", + ], + "algebra": [ + "simple_equations", + "polynomial_equations", + "polynomial_multiplication", + "simple_integration", + "intermediate_integration", + "complex_arithmetic", + ], + "algorithmic": [ + "word_sorting", + "string_insertion", + "sentence_reordering", + "count_primes", + "binary_alternation", + "manipulate_matrix", + "pool_matrix", + "base_conversion", + "jugs", + "graph_color", + "spiral_matrix", + "cryptarithm", + "ab", + "spell_backward", + "letter_counting", + "letter_jumble", + "number_sorting", + "caesar_cipher", + "rotate_matrix", + "rotten_oranges", + "palindrome_generation", + "palindrome_partitioning", + "game_of_life", + "game_of_life_halting", + "group_anagrams", + "isomorphic_strings", + "string_synthesis", + "binary_matrix", + "word_sequence_reversal", + "number_filtering", + "word_ladder", + "string_manipulation", + "ransom_note", + "string_splitting", + ], + "cognition": [ + "figlet_font", + "needle_haystack", + "modulo_grid", + "number_sequence", + "rubiks_cube", + "color_cube_rotation", + "rectangle_count", + ], + "arc": [ + "arc_1d", + "arc_agi", + "rearc", + ], + "code": [ + "bf", + "codeio", + ], + "games": [ + "boxnet", + "countdown", + "emoji_mystery", + "futoshiki", + "kakurasu", + "knight_swap", + "mahjong_puzzle", + "maze", + "mini_sudoku", + "n_queens", + "puzzle24", + "rush_hour", + "sokoban", + "sudoku", + "survo", + "tower_of_hanoi", + "tsumego", + ], + "geometry": [ + "advanced_geometry", + "simple_geometry", + ], + "graphs": [ + "course_schedule", + "family_relationships", + "largest_island", + "quantum_lock", + "shortest_path", + ], + "induction": [ + "acre", + "list_functions", + ], + "probability": [ + "coin_flip", + ], +} + + +def format_entry_to_nemo_gym(entry: dict) -> dict: + return { + "responses_create_params": { + "input": [{"role": "user", "content": entry["question"]}], + }, + **entry, + "agent_ref": {"type": "responses_api_agents", "name": "reasoning_gym_rewoo_agent"}, + } + + +def create_single_task_dataset(task_name: str, size: int, seed: int, config: dict = None) -> list[dict]: + config = config or {} + config["size"] = size + config["seed"] = seed + + print(f"Creating {task_name} dataset with {size} samples (seed={seed})...") + try: + dataset = reasoning_gym.create_dataset(task_name, **config) + except Exception as e: + print(f"Error creating dataset {task_name}: {e}") + return [] + + entries = [] + for i in range(size): + try: + entry = dataset[i] + entries.append(format_entry_to_nemo_gym(entry)) + except Exception as e: + print(f"Error generating entry {i} for {task_name}: {e}") + continue + + print(f"Generated {len(entries)} entries for {task_name}") + return entries + + +def create_composite_dataset(task_names: list[str], size: int, seed: int, config: dict = None) -> list[dict]: + config = config or {} + + specs = [DatasetSpec(name=name, weight=1.0, config=config.get(name, {})) for name in task_names] + + print(f"Creating composite dataset with {len(task_names)} tasks: {', '.join(task_names)}") + print(f"Total samples: {size}, seed: {seed}") + + try: + dataset = reasoning_gym.create_dataset("composite", size=size, seed=seed, datasets=specs) + except Exception as e: + print(f"Error creating composite dataset: {e}") + return [] + + entries = [] + for i in range(size): + try: + entry = dataset[i] + entries.append(format_entry_to_nemo_gym(entry)) + except Exception as e: + print(f"Error generating entry {i}: {e}") + continue + + task_counts = {} + for entry in entries: + task = entry["metadata"]["source_dataset"] + task_counts[task] = task_counts.get(task, 0) + 1 + + print(f"\nGenerated {len(entries)} total entries") + print("Task distribution:") + for task, count in sorted(task_counts.items()): + print(f" {task}: {count}") + + return entries + + +def main(): + parser = argparse.ArgumentParser( + description="Create reasoning_gym datasets in NeMo Gym format", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=__doc__, + ) + + task_group = parser.add_mutually_exclusive_group(required=True) + task_group.add_argument("--task", type=str, help="Single task name (e.g., knights_knaves)") + task_group.add_argument("--tasks", type=str, help="Comma-separated list of tasks") + task_group.add_argument( + "--category", type=str, choices=list(TASK_CATEGORIES.keys()), help="Task category (e.g., logic, arithmetic)" + ) + task_group.add_argument("--all-tasks", action="store_true", help="Use all tasks from all categories") + + parser.add_argument("--size", type=int, default=500, help="Number of samples to generate (default: 500)") + parser.add_argument("--seed", type=int, default=42, help="Random seed (default: 42)") + parser.add_argument( + "--config", + type=str, + default="{}", + help="Task-specific config as JSON string (e.g., '{\"n_people\": 3}')", + ) + + parser.add_argument( + "--output", type=str, required=True, help="Output JSONL file path (e.g., data/train_knights_knaves.jsonl)" + ) + + args = parser.parse_args() + + try: + config = json.loads(args.config) + except json.JSONDecodeError as e: + print(f"Error parsing config JSON: {e}") + sys.exit(1) + + if args.task: + task_names = [args.task] + elif args.tasks: + task_names = [t.strip() for t in args.tasks.split(",")] + elif args.category: + task_names = TASK_CATEGORIES[args.category] + elif args.all_tasks: + task_names = [] + for category_tasks in TASK_CATEGORIES.values(): + task_names.extend(category_tasks) + else: + print("Error: Must specify --task, --tasks, --category, or --all-tasks") + sys.exit(1) + + if len(task_names) == 1: + entries = create_single_task_dataset(task_names[0], args.size, args.seed, config) + else: + entries = create_composite_dataset(task_names, args.size, args.seed, config) + + if not entries: + print("Error: No entries generated") + sys.exit(1) + + output_path = Path(args.output) + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, "w") as f: + for entry in entries: + f.write(json.dumps(entry) + "\n") + + print(f"\nDataset saved to {output_path}") + print(f"Total entries: {len(entries)}") + + +if __name__ == "__main__": + main()