diff --git a/environments/reasoning_gym_claude_code/README.md b/environments/reasoning_gym_claude_code/README.md
new file mode 100644
index 000000000..9a3dc0400
--- /dev/null
+++ b/environments/reasoning_gym_claude_code/README.md
@@ -0,0 +1,50 @@
+# Reasoning Gym — Claude Code Agent
+
+Claude Code agent harness for reasoning gym tasks.
+
+Source benchmark: https://github.com/open-thought/reasoning-gym
+
+## Configuration
+
+Set Anthropic credentials in `env.yaml`:
+
+```yaml
+anthropic_api_key: sk-ant-...
+anthropic_model_name: claude-sonnet-4-6
+anthropic_base_url: null
+```
+
+For a local vLLM or Ollama endpoint that serves the Anthropic Messages API:
+
+```yaml
+anthropic_api_key: EMPTY
+anthropic_model_name: Qwen/Qwen3-4B-Instruct-2507
+anthropic_base_url: http://localhost:8000
+```
+
+`anthropic_base_url` should not include `/v1`. Claude Code appends `/v1/messages` itself.
+
+See [`responses_api_agents/claude_code_agent`](../../responses_api_agents/claude_code_agent/README.md) for the full set of agent options (`thinking`, `max_thinking_tokens`, `allowed_tools`, `disallowed_tools`, `max_turns`, `timeout`, etc.).
+
+## Quick start
+
+```bash
+ng_run "+config_paths=[environments/reasoning_gym_claude_code/config.yaml]"
+```
+
+```bash
+ng_collect_rollouts \
+    +agent_name=reasoning_gym_claude_code_agent \
+    +input_jsonl_fpath=environments/reasoning_gym_claude_code/data/example.jsonl \
+    +output_jsonl_fpath=results/reasoning_gym_claude_code_rollouts.jsonl
+```
+
+## Prepare training data
+
+```bash
+python environments/reasoning_gym_claude_code/prepare.py --task knights_knaves --size 1000 --output environments/reasoning_gym_claude_code/data/train_knights_knaves.jsonl
+```
+
+See `prepare.py` for all available tasks, categories, and config options.
+
+Alternatively, a pre-built dataset is hosted on HuggingFace at [nvidia/Nemotron-RL-ReasoningGym-v1](https://huggingface.co/datasets/nvidia/Nemotron-RL-ReasoningGym-v1).
diff --git a/environments/reasoning_gym_claude_code/__init__.py b/environments/reasoning_gym_claude_code/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/environments/reasoning_gym_claude_code/config.yaml b/environments/reasoning_gym_claude_code/config.yaml
new file mode 100644
index 000000000..44871845c
--- /dev/null
+++ b/environments/reasoning_gym_claude_code/config.yaml
@@ -0,0 +1,34 @@
+reasoning_gym:
+  resources_servers:
+    reasoning_gym:
+      entrypoint: app.py
+      domain: knowledge
+      verified: true
+      description: Claude Code agent harness for reasoning gym tasks
+      value: Evaluate model capabilities in the Claude Code agent harness
+
+reasoning_gym_claude_code_agent:
+  responses_api_agents:
+    claude_code_agent:
+      entrypoint: app.py
+      resources_server:
+        type: resources_servers
+        name: reasoning_gym
+      concurrency: 32
+      model: ${anthropic_model_name}
+      anthropic_api_key: ${anthropic_api_key}
+      anthropic_base_url: ${anthropic_base_url}
+      max_turns: 30
+      timeout: 300
+      thinking: disabled
+      system_prompt: |
+        You are a precise reasoning assistant. You have access to Bash to run Python for calculations.
+        For every problem: think step by step, use code to verify when helpful, and state your final answer clearly.
+      datasets:
+      - name: example
+        type: example
+        jsonl_fpath: environments/reasoning_gym_claude_code/data/example.jsonl
+      - name: train
+        type: train
+        jsonl_fpath: environments/reasoning_gym_claude_code/data/train_knights_knaves.jsonl
+        license: Apache 2.0
diff --git a/environments/reasoning_gym_claude_code/data/.gitignore b/environments/reasoning_gym_claude_code/data/.gitignore
new file mode 100644
index 000000000..4424b6fde
--- /dev/null
+++ b/environments/reasoning_gym_claude_code/data/.gitignore
@@ -0,0 +1,5 @@
+*train.jsonl
+*validation.jsonl
+*train_prepare.jsonl
+*validation_prepare.jsonl
+*example_prepare.jsonl
diff --git a/environments/reasoning_gym_claude_code/data/example.jsonl b/environments/reasoning_gym_claude_code/data/example.jsonl
new file mode 100644
index 000000000..533ef47c8
--- /dev/null
+++ b/environments/reasoning_gym_claude_code/data/example.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")"}]}, "question": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")", "answer": "Zoey is a fool, and Riley is a sage.", "metadata": {"source_dataset": "knights_knaves", "source_index": 0, "statements": [["lying", 1], ["or", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Zoey", "Riley"], "knight_knave_terms": {"knight": "sage", "knave": "fool", "a_knight": "a sage", "a_knave": "a fool", "Knight": "Sage", "Knave": "Fool"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")", "answer": "Riley is a pioneer, and Olivia is a laggard.", "metadata": {"source_dataset": "knights_knaves", "source_index": 1, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["and", ["telling-truth", 1], ["lying", 0]]], "solution": [true, false], "names": ["Riley", "Olivia"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")"}]}, "question": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")", "answer": "Samuel is a saint, and Jacob is a sinner.", "metadata": {"source_dataset": "knights_knaves", "source_index": 2, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["->", ["telling-truth", 0], ["lying", 0]]], "solution": [true, false], "names": ["Samuel", "Jacob"], "knight_knave_terms": {"knight": "saint", "knave": "sinner", "a_knight": "a saint", "a_knave": "a sinner", "Knight": "Saint", "Knave": "Sinner"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")", "answer": "Olivia is a laggard, and Lily is a pioneer.", "metadata": {"source_dataset": "knights_knaves", "source_index": 3, "statements": [["and", ["lying", 1], ["telling-truth", 1]], ["->", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Olivia", "Lily"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")"}]}, "question": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")", "answer": "Mason is an altruist, and Jack is an egoist.", "metadata": {"source_dataset": "knights_knaves", "source_index": 4, "statements": [["->", ["lying", 1], ["telling-truth", 0]], ["not", ["telling-truth", 0]]], "solution": [true, false], "names": ["Mason", "Jack"], "knight_knave_terms": {"knight": "altruist", "knave": "egoist", "a_knight": "an altruist", "a_knave": "an egoist", "Knight": "Altruist", "Knave": "Egoist"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
diff --git a/environments/reasoning_gym_claude_code/prepare.py b/environments/reasoning_gym_claude_code/prepare.py
new file mode 100644
index 000000000..397adeb9d
--- /dev/null
+++ b/environments/reasoning_gym_claude_code/prepare.py
@@ -0,0 +1,315 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+# Single task with default config
+python environments/reasoning_gym_claude_code/prepare.py --task knights_knaves --size 500 --seed 42 --output data/train.jsonl
+
+# Multiple tasks (creates composite dataset with equal weights)
+python environments/reasoning_gym_claude_code/prepare.py --tasks knights_knaves,syllogism --size 500 --output data/train.jsonl
+
+# All tasks in a category
+python environments/reasoning_gym_claude_code/prepare.py --category logic --size 500 --output data/train.jsonl
+
+# All tasks from all categories
+python environments/reasoning_gym_claude_code/prepare.py --all-tasks --size 5000 --output data/train_all.jsonl
+
+# Single task with custom config
+python environments/reasoning_gym_claude_code/prepare.py --task knights_knaves --size 500 --config '{"n_people": 3}' --output data/train.jsonl
+"""
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+import reasoning_gym
+from reasoning_gym.composite import DatasetSpec
+
+
+TASK_CATEGORIES = {
+    "logic": [
+        "knights_knaves",
+        "syllogism",
+        "propositional_logic",
+        "zebra_puzzles",
+        "aiw",
+        "circuit_logic",
+        "self_reference",
+    ],
+    "arithmetic": [
+        "basic_arithmetic",
+        "chain_sum",
+        "leg_counting",
+        "prime_factorization",
+        "gcd",
+        "lcm",
+        "time_intervals",
+        "calendar_arithmetic",
+        "dice",
+        "products",
+        "decimal_chain_sum",
+        "decimal_arithmetic",
+        "count_bits",
+        "bitwise_arithmetic",
+        "number_format",
+        "fraction_simplification",
+        "power_function",
+        "gsm_symbolic",
+    ],
+    "algebra": [
+        "simple_equations",
+        "polynomial_equations",
+        "polynomial_multiplication",
+        "simple_integration",
+        "intermediate_integration",
+        "complex_arithmetic",
+    ],
+    "algorithmic": [
+        "word_sorting",
+        "string_insertion",
+        "sentence_reordering",
+        "count_primes",
+        "binary_alternation",
+        "manipulate_matrix",
+        "pool_matrix",
+        "base_conversion",
+        "jugs",
+        "graph_color",
+        "spiral_matrix",
+        "cryptarithm",
+        "ab",
+        "spell_backward",
+        "letter_counting",
+        "letter_jumble",
+        "number_sorting",
+        "caesar_cipher",
+        "rotate_matrix",
+        "rotten_oranges",
+        "palindrome_generation",
+        "palindrome_partitioning",
+        "game_of_life",
+        "game_of_life_halting",
+        "group_anagrams",
+        "isomorphic_strings",
+        "string_synthesis",
+        "binary_matrix",
+        "word_sequence_reversal",
+        "number_filtering",
+        "word_ladder",
+        "string_manipulation",
+        "ransom_note",
+        "string_splitting",
+    ],
+    "cognition": [
+        "figlet_font",
+        "needle_haystack",
+        "modulo_grid",
+        "number_sequence",
+        "rubiks_cube",
+        "color_cube_rotation",
+        "rectangle_count",
+    ],
+    "arc": [
+        "arc_1d",
+        "arc_agi",
+        "rearc",
+    ],
+    "code": [
+        "bf",
+        "codeio",
+    ],
+    "games": [
+        "boxnet",
+        "countdown",
+        "emoji_mystery",
+        "futoshiki",
+        "kakurasu",
+        "knight_swap",
+        "mahjong_puzzle",
+        "maze",
+        "mini_sudoku",
+        "n_queens",
+        "puzzle24",
+        "rush_hour",
+        "sokoban",
+        "sudoku",
+        "survo",
+        "tower_of_hanoi",
+        "tsumego",
+    ],
+    "geometry": [
+        "advanced_geometry",
+        "simple_geometry",
+    ],
+    "graphs": [
+        "course_schedule",
+        "family_relationships",
+        "largest_island",
+        "quantum_lock",
+        "shortest_path",
+    ],
+    "induction": [
+        "acre",
+        "list_functions",
+    ],
+    "probability": [
+        "coin_flip",
+    ],
+}
+
+
+def format_entry_to_nemo_gym(entry: dict) -> dict:
+    return {
+        "responses_create_params": {
+            "input": [{"role": "user", "content": entry["question"]}],
+        },
+        **entry,
+        "agent_ref": {"type": "responses_api_agents", "name": "reasoning_gym_claude_code_agent"},
+    }
+
+
+def create_single_task_dataset(task_name: str, size: int, seed: int, config: dict = None) -> list[dict]:
+    config = config or {}
+    config["size"] = size
+    config["seed"] = seed
+
+    print(f"Creating {task_name} dataset with {size} samples (seed={seed})...")
+    try:
+        dataset = reasoning_gym.create_dataset(task_name, **config)
+    except Exception as e:
+        print(f"Error creating dataset {task_name}: {e}")
+        return []
+
+    entries = []
+    for i in range(size):
+        try:
+            entry = dataset[i]
+            entries.append(format_entry_to_nemo_gym(entry))
+        except Exception as e:
+            print(f"Error generating entry {i} for {task_name}: {e}")
+            continue
+
+    print(f"Generated {len(entries)} entries for {task_name}")
+    return entries
+
+
+def create_composite_dataset(task_names: list[str], size: int, seed: int, config: dict = None) -> list[dict]:
+    config = config or {}
+
+    specs = [DatasetSpec(name=name, weight=1.0, config=config.get(name, {})) for name in task_names]
+
+    print(f"Creating composite dataset with {len(task_names)} tasks: {', '.join(task_names)}")
+    print(f"Total samples: {size}, seed: {seed}")
+
+    try:
+        dataset = reasoning_gym.create_dataset("composite", size=size, seed=seed, datasets=specs)
+    except Exception as e:
+        print(f"Error creating composite dataset: {e}")
+        return []
+
+    entries = []
+    for i in range(size):
+        try:
+            entry = dataset[i]
+            entries.append(format_entry_to_nemo_gym(entry))
+        except Exception as e:
+            print(f"Error generating entry {i}: {e}")
+            continue
+
+    task_counts = {}
+    for entry in entries:
+        task = entry["metadata"]["source_dataset"]
+        task_counts[task] = task_counts.get(task, 0) + 1
+
+    print(f"\nGenerated {len(entries)} total entries")
+    print("Task distribution:")
+    for task, count in sorted(task_counts.items()):
+        print(f"  {task}: {count}")
+
+    return entries
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Create reasoning_gym datasets in NeMo Gym format",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+
+    task_group = parser.add_mutually_exclusive_group(required=True)
+    task_group.add_argument("--task", type=str, help="Single task name (e.g., knights_knaves)")
+    task_group.add_argument("--tasks", type=str, help="Comma-separated list of tasks")
+    task_group.add_argument(
+        "--category", type=str, choices=list(TASK_CATEGORIES.keys()), help="Task category (e.g., logic, arithmetic)"
+    )
+    task_group.add_argument("--all-tasks", action="store_true", help="Use all tasks from all categories")
+
+    parser.add_argument("--size", type=int, default=500, help="Number of samples to generate (default: 500)")
+    parser.add_argument("--seed", type=int, default=42, help="Random seed (default: 42)")
+    parser.add_argument(
+        "--config",
+        type=str,
+        default="{}",
+        help="Task-specific config as JSON string (e.g., '{\"n_people\": 3}')",
+    )
+
+    parser.add_argument(
+        "--output", type=str, required=True, help="Output JSONL file path (e.g., data/train_knights_knaves.jsonl)"
+    )
+
+    args = parser.parse_args()
+
+    try:
+        config = json.loads(args.config)
+    except json.JSONDecodeError as e:
+        print(f"Error parsing config JSON: {e}")
+        sys.exit(1)
+
+    if args.task:
+        task_names = [args.task]
+    elif args.tasks:
+        task_names = [t.strip() for t in args.tasks.split(",")]
+    elif args.category:
+        task_names = TASK_CATEGORIES[args.category]
+    elif args.all_tasks:
+        task_names = []
+        for category_tasks in TASK_CATEGORIES.values():
+            task_names.extend(category_tasks)
+    else:
+        print("Error: Must specify --task, --tasks, --category, or --all-tasks")
+        sys.exit(1)
+
+    if len(task_names) == 1:
+        entries = create_single_task_dataset(task_names[0], args.size, args.seed, config)
+    else:
+        entries = create_composite_dataset(task_names, args.size, args.seed, config)
+
+    if not entries:
+        print("Error: No entries generated")
+        sys.exit(1)
+
+    output_path = Path(args.output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with open(output_path, "w") as f:
+        for entry in entries:
+            f.write(json.dumps(entry) + "\n")
+
+    print(f"\nDataset saved to {output_path}")
+    print(f"Total entries: {len(entries)}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/environments/reasoning_gym_hermes/README.md b/environments/reasoning_gym_hermes/README.md
new file mode 100644
index 000000000..49ce6a23e
--- /dev/null
+++ b/environments/reasoning_gym_hermes/README.md
@@ -0,0 +1,28 @@
+# Reasoning Gym — Hermes Agent
+
+Hermes Agent with terminal, file, code_execution, skills, todo toolsets on reasoning_gym tasks.
+
+Source benchmark: https://github.com/open-thought/reasoning-gym
+
+## Quick start
+
+```bash
+ng_run "+config_paths=[environments/reasoning_gym_hermes/config.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]"
+```
+
+```bash
+ng_collect_rollouts \
+    +agent_name=reasoning_gym_hermes_agent \
+    +input_jsonl_fpath=environments/reasoning_gym_hermes/data/example.jsonl \
+    +output_jsonl_fpath=results/reasoning_gym_hermes_rollouts.jsonl
+```
+
+## Prepare training data
+
+```bash
+python environments/reasoning_gym_hermes/prepare.py --task knights_knaves --size 1000 --output environments/reasoning_gym_hermes/data/train_knights_knaves.jsonl
+```
+
+See `prepare.py` for all available tasks, categories, and config options.
+
+Alternatively, a pre-built dataset is hosted on HuggingFace at [nvidia/Nemotron-RL-ReasoningGym-v1](https://huggingface.co/datasets/nvidia/Nemotron-RL-ReasoningGym-v1).
diff --git a/environments/reasoning_gym_hermes/__init__.py b/environments/reasoning_gym_hermes/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/environments/reasoning_gym_hermes/config.yaml b/environments/reasoning_gym_hermes/config.yaml
new file mode 100644
index 000000000..e0d8447de
--- /dev/null
+++ b/environments/reasoning_gym_hermes/config.yaml
@@ -0,0 +1,35 @@
+reasoning_gym:
+  resources_servers:
+    reasoning_gym:
+      entrypoint: app.py
+      domain: knowledge
+      verified: false
+      description: Hermes Agent with terminal, file, code_execution, skills, todo toolsets on reasoning_gym tasks
+      value: Improve model reasoning capabilities in hermes agent harness
+
+reasoning_gym_hermes_agent:
+  responses_api_agents:
+    hermes_agent:
+      entrypoint: app.py
+      resources_server:
+        type: resources_servers
+        name: reasoning_gym
+      model_server:
+        type: responses_api_models
+        name: policy_model
+      enabled_toolsets: [terminal, file, code_execution, skills, todo]
+      max_turns: 30
+      concurrency: 32
+      terminal_backend: local
+      terminal_timeout: 30
+      system_prompt: |
+        You are a precise reasoning assistant. You have access to a terminal tool to run Python for calculations or verification.
+        Use it when computation would help. State your final answer clearly.
+      datasets:
+        - name: example
+          type: example
+          jsonl_fpath: environments/reasoning_gym_hermes/data/example.jsonl
+        - name: train
+          type: train
+          jsonl_fpath: environments/reasoning_gym_hermes/data/train_knights_knaves.jsonl
+          license: Apache 2.0
diff --git a/environments/reasoning_gym_hermes/data/.gitignore b/environments/reasoning_gym_hermes/data/.gitignore
new file mode 100644
index 000000000..4424b6fde
--- /dev/null
+++ b/environments/reasoning_gym_hermes/data/.gitignore
@@ -0,0 +1,5 @@
+*train.jsonl
+*validation.jsonl
+*train_prepare.jsonl
+*validation_prepare.jsonl
+*example_prepare.jsonl
diff --git a/environments/reasoning_gym_hermes/data/example.jsonl b/environments/reasoning_gym_hermes/data/example.jsonl
new file mode 100644
index 000000000..533ef47c8
--- /dev/null
+++ b/environments/reasoning_gym_hermes/data/example.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")"}]}, "question": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")", "answer": "Zoey is a fool, and Riley is a sage.", "metadata": {"source_dataset": "knights_knaves", "source_index": 0, "statements": [["lying", 1], ["or", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Zoey", "Riley"], "knight_knave_terms": {"knight": "sage", "knave": "fool", "a_knight": "a sage", "a_knave": "a fool", "Knight": "Sage", "Knave": "Fool"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")", "answer": "Riley is a pioneer, and Olivia is a laggard.", "metadata": {"source_dataset": "knights_knaves", "source_index": 1, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["and", ["telling-truth", 1], ["lying", 0]]], "solution": [true, false], "names": ["Riley", "Olivia"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")"}]}, "question": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")", "answer": "Samuel is a saint, and Jacob is a sinner.", "metadata": {"source_dataset": "knights_knaves", "source_index": 2, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["->", ["telling-truth", 0], ["lying", 0]]], "solution": [true, false], "names": ["Samuel", "Jacob"], "knight_knave_terms": {"knight": "saint", "knave": "sinner", "a_knight": "a saint", "a_knave": "a sinner", "Knight": "Saint", "Knave": "Sinner"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")", "answer": "Olivia is a laggard, and Lily is a pioneer.", "metadata": {"source_dataset": "knights_knaves", "source_index": 3, "statements": [["and", ["lying", 1], ["telling-truth", 1]], ["->", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Olivia", "Lily"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")"}]}, "question": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")", "answer": "Mason is an altruist, and Jack is an egoist.", "metadata": {"source_dataset": "knights_knaves", "source_index": 4, "statements": [["->", ["lying", 1], ["telling-truth", 0]], ["not", ["telling-truth", 0]]], "solution": [true, false], "names": ["Mason", "Jack"], "knight_knave_terms": {"knight": "altruist", "knave": "egoist", "a_knight": "an altruist", "a_knave": "an egoist", "Knight": "Altruist", "Knave": "Egoist"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
diff --git a/environments/reasoning_gym_hermes/prepare.py b/environments/reasoning_gym_hermes/prepare.py
new file mode 100644
index 000000000..1d236e1f4
--- /dev/null
+++ b/environments/reasoning_gym_hermes/prepare.py
@@ -0,0 +1,315 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+# Single task with default config
+python environments/reasoning_gym_hermes/prepare.py --task knights_knaves --size 500 --seed 42 --output data/train.jsonl
+
+# Multiple tasks (creates composite dataset with equal weights)
+python environments/reasoning_gym_hermes/prepare.py --tasks knights_knaves,syllogism --size 500 --output data/train.jsonl
+
+# All tasks in a category
+python environments/reasoning_gym_hermes/prepare.py --category logic --size 500 --output data/train.jsonl
+
+# All tasks from all categories
+python environments/reasoning_gym_hermes/prepare.py --all-tasks --size 5000 --output data/train_all.jsonl
+
+# Single task with custom config
+python environments/reasoning_gym_hermes/prepare.py --task knights_knaves --size 500 --config '{"n_people": 3}' --output data/train.jsonl
+"""
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+import reasoning_gym
+from reasoning_gym.composite import DatasetSpec
+
+
+TASK_CATEGORIES = {
+    "logic": [
+        "knights_knaves",
+        "syllogism",
+        "propositional_logic",
+        "zebra_puzzles",
+        "aiw",
+        "circuit_logic",
+        "self_reference",
+    ],
+    "arithmetic": [
+        "basic_arithmetic",
+        "chain_sum",
+        "leg_counting",
+        "prime_factorization",
+        "gcd",
+        "lcm",
+        "time_intervals",
+        "calendar_arithmetic",
+        "dice",
+        "products",
+        "decimal_chain_sum",
+        "decimal_arithmetic",
+        "count_bits",
+        "bitwise_arithmetic",
+        "number_format",
+        "fraction_simplification",
+        "power_function",
+        "gsm_symbolic",
+    ],
+    "algebra": [
+        "simple_equations",
+        "polynomial_equations",
+        "polynomial_multiplication",
+        "simple_integration",
+        "intermediate_integration",
+        "complex_arithmetic",
+    ],
+    "algorithmic": [
+        "word_sorting",
+        "string_insertion",
+        "sentence_reordering",
+        "count_primes",
+        "binary_alternation",
+        "manipulate_matrix",
+        "pool_matrix",
+        "base_conversion",
+        "jugs",
+        "graph_color",
+        "spiral_matrix",
+        "cryptarithm",
+        "ab",
+        "spell_backward",
+        "letter_counting",
+        "letter_jumble",
+        "number_sorting",
+        "caesar_cipher",
+        "rotate_matrix",
+        "rotten_oranges",
+        "palindrome_generation",
+        "palindrome_partitioning",
+        "game_of_life",
+        "game_of_life_halting",
+        "group_anagrams",
+        "isomorphic_strings",
+        "string_synthesis",
+        "binary_matrix",
+        "word_sequence_reversal",
+        "number_filtering",
+        "word_ladder",
+        "string_manipulation",
+        "ransom_note",
+        "string_splitting",
+    ],
+    "cognition": [
+        "figlet_font",
+        "needle_haystack",
+        "modulo_grid",
+        "number_sequence",
+        "rubiks_cube",
+        "color_cube_rotation",
+        "rectangle_count",
+    ],
+    "arc": [
+        "arc_1d",
+        "arc_agi",
+        "rearc",
+    ],
+    "code": [
+        "bf",
+        "codeio",
+    ],
+    "games": [
+        "boxnet",
+        "countdown",
+        "emoji_mystery",
+        "futoshiki",
+        "kakurasu",
+        "knight_swap",
+        "mahjong_puzzle",
+        "maze",
+        "mini_sudoku",
+        "n_queens",
+        "puzzle24",
+        "rush_hour",
+        "sokoban",
+        "sudoku",
+        "survo",
+        "tower_of_hanoi",
+        "tsumego",
+    ],
+    "geometry": [
+        "advanced_geometry",
+        "simple_geometry",
+    ],
+    "graphs": [
+        "course_schedule",
+        "family_relationships",
+        "largest_island",
+        "quantum_lock",
+        "shortest_path",
+    ],
+    "induction": [
+        "acre",
+        "list_functions",
+    ],
+    "probability": [
+        "coin_flip",
+    ],
+}
+
+
+def format_entry_to_nemo_gym(entry: dict) -> dict:
+    return {
+        "responses_create_params": {
+            "input": [{"role": "user", "content": entry["question"]}],
+        },
+        **entry,
+        "agent_ref": {"type": "responses_api_agents", "name": "reasoning_gym_hermes_agent"},
+    }
+
+
+def create_single_task_dataset(task_name: str, size: int, seed: int, config: dict = None) -> list[dict]:
+    config = config or {}
+    config["size"] = size
+    config["seed"] = seed
+
+    print(f"Creating {task_name} dataset with {size} samples (seed={seed})...")
+    try:
+        dataset = reasoning_gym.create_dataset(task_name, **config)
+    except Exception as e:
+        print(f"Error creating dataset {task_name}: {e}")
+        return []
+
+    entries = []
+    for i in range(size):
+        try:
+            entry = dataset[i]
+            entries.append(format_entry_to_nemo_gym(entry))
+        except Exception as e:
+            print(f"Error generating entry {i} for {task_name}: {e}")
+            continue
+
+    print(f"Generated {len(entries)} entries for {task_name}")
+    return entries
+
+
+def create_composite_dataset(task_names: list[str], size: int, seed: int, config: dict = None) -> list[dict]:
+    config = config or {}
+
+    specs = [DatasetSpec(name=name, weight=1.0, config=config.get(name, {})) for name in task_names]
+
+    print(f"Creating composite dataset with {len(task_names)} tasks: {', '.join(task_names)}")
+    print(f"Total samples: {size}, seed: {seed}")
+
+    try:
+        dataset = reasoning_gym.create_dataset("composite", size=size, seed=seed, datasets=specs)
+    except Exception as e:
+        print(f"Error creating composite dataset: {e}")
+        return []
+
+    entries = []
+    for i in range(size):
+        try:
+            entry = dataset[i]
+            entries.append(format_entry_to_nemo_gym(entry))
+        except Exception as e:
+            print(f"Error generating entry {i}: {e}")
+            continue
+
+    task_counts = {}
+    for entry in entries:
+        task = entry["metadata"]["source_dataset"]
+        task_counts[task] = task_counts.get(task, 0) + 1
+
+    print(f"\nGenerated {len(entries)} total entries")
+    print("Task distribution:")
+    for task, count in sorted(task_counts.items()):
+        print(f"  {task}: {count}")
+
+    return entries
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Create reasoning_gym datasets in NeMo Gym format",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+
+    task_group = parser.add_mutually_exclusive_group(required=True)
+    task_group.add_argument("--task", type=str, help="Single task name (e.g., knights_knaves)")
+    task_group.add_argument("--tasks", type=str, help="Comma-separated list of tasks")
+    task_group.add_argument(
+        "--category", type=str, choices=list(TASK_CATEGORIES.keys()), help="Task category (e.g., logic, arithmetic)"
+    )
+    task_group.add_argument("--all-tasks", action="store_true", help="Use all tasks from all categories")
+
+    parser.add_argument("--size", type=int, default=500, help="Number of samples to generate (default: 500)")
+    parser.add_argument("--seed", type=int, default=42, help="Random seed (default: 42)")
+    parser.add_argument(
+        "--config",
+        type=str,
+        default="{}",
+        help="Task-specific config as JSON string (e.g., '{\"n_people\": 3}')",
+    )
+
+    parser.add_argument(
+        "--output", type=str, required=True, help="Output JSONL file path (e.g., data/train_knights_knaves.jsonl)"
+    )
+
+    args = parser.parse_args()
+
+    try:
+        config = json.loads(args.config)
+    except json.JSONDecodeError as e:
+        print(f"Error parsing config JSON: {e}")
+        sys.exit(1)
+
+    if args.task:
+        task_names = [args.task]
+    elif args.tasks:
+        task_names = [t.strip() for t in args.tasks.split(",")]
+    elif args.category:
+        task_names = TASK_CATEGORIES[args.category]
+    elif args.all_tasks:
+        task_names = []
+        for category_tasks in TASK_CATEGORIES.values():
+            task_names.extend(category_tasks)
+    else:
+        print("Error: Must specify --task, --tasks, --category, or --all-tasks")
+        sys.exit(1)
+
+    if len(task_names) == 1:
+        entries = create_single_task_dataset(task_names[0], args.size, args.seed, config)
+    else:
+        entries = create_composite_dataset(task_names, args.size, args.seed, config)
+
+    if not entries:
+        print("Error: No entries generated")
+        sys.exit(1)
+
+    output_path = Path(args.output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with open(output_path, "w") as f:
+        for entry in entries:
+            f.write(json.dumps(entry) + "\n")
+
+    print(f"\nDataset saved to {output_path}")
+    print(f"Total entries: {len(entries)}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/environments/reasoning_gym_orchestrator/README.md b/environments/reasoning_gym_orchestrator/README.md
new file mode 100644
index 000000000..3220c438d
--- /dev/null
+++ b/environments/reasoning_gym_orchestrator/README.md
@@ -0,0 +1,28 @@
+# Reasoning Gym — LangGraph Orchestrator Agent
+
+LangGraph orchestrator agent compatible with resource servers that do not use tools; enables diverse agent training data and test time scaling vs a simple agent, extensible to use tools or other agent architectures.
+
+Source benchmark: https://github.com/open-thought/reasoning-gym
+
+## Quick start
+
+```bash
+ng_run "+config_paths=[environments/reasoning_gym_orchestrator/config.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]"
+```
+
+```bash
+ng_collect_rollouts \
+    +agent_name=reasoning_gym_orchestrator_agent \
+    +input_jsonl_fpath=environments/reasoning_gym_orchestrator/data/example.jsonl \
+    +output_jsonl_fpath=results/reasoning_gym_orchestrator_rollouts.jsonl
+```
+
+## Prepare training data
+
+```bash
+python environments/reasoning_gym_orchestrator/prepare.py --task knights_knaves --size 1000 --output environments/reasoning_gym_orchestrator/data/train_knights_knaves.jsonl
+```
+
+See `prepare.py` for all available tasks, categories, and config options.
+
+Alternatively, a pre-built dataset is hosted on HuggingFace at [nvidia/Nemotron-RL-ReasoningGym-v1](https://huggingface.co/datasets/nvidia/Nemotron-RL-ReasoningGym-v1).
diff --git a/environments/reasoning_gym_orchestrator/__init__.py b/environments/reasoning_gym_orchestrator/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/environments/reasoning_gym_orchestrator/config.yaml b/environments/reasoning_gym_orchestrator/config.yaml
new file mode 100644
index 000000000..f01db3f9d
--- /dev/null
+++ b/environments/reasoning_gym_orchestrator/config.yaml
@@ -0,0 +1,27 @@
+reasoning_gym:
+  resources_servers:
+    reasoning_gym:
+      entrypoint: app.py
+      domain: knowledge
+      description: LangGraph orchestrator agent compatible with resource servers that do not use tools; enables diverse agent training data and test time scaling vs a simple agent, extensible to use tools or other agent architectures
+      value: Iterative test time scaling for improved performance in reasoning tasks
+      verified: false
+
+reasoning_gym_orchestrator_agent:
+  responses_api_agents:
+    langgraph_agent:
+      entrypoint: orchestrator_agent.py
+      resources_server:
+        type: resources_servers
+        name: reasoning_gym
+      model_server:
+        type: responses_api_models
+        name: policy_model
+      datasets:
+      - name: example
+        type: example
+        jsonl_fpath: environments/reasoning_gym_orchestrator/data/example.jsonl
+      - name: train
+        type: train
+        jsonl_fpath: environments/reasoning_gym_orchestrator/data/train_knights_knaves.jsonl
+        license: Apache 2.0
diff --git a/environments/reasoning_gym_orchestrator/data/.gitignore b/environments/reasoning_gym_orchestrator/data/.gitignore
new file mode 100644
index 000000000..4424b6fde
--- /dev/null
+++ b/environments/reasoning_gym_orchestrator/data/.gitignore
@@ -0,0 +1,5 @@
+*train.jsonl
+*validation.jsonl
+*train_prepare.jsonl
+*validation_prepare.jsonl
+*example_prepare.jsonl
diff --git a/environments/reasoning_gym_orchestrator/data/example.jsonl b/environments/reasoning_gym_orchestrator/data/example.jsonl
new file mode 100644
index 000000000..533ef47c8
--- /dev/null
+++ b/environments/reasoning_gym_orchestrator/data/example.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")"}]}, "question": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")", "answer": "Zoey is a fool, and Riley is a sage.", "metadata": {"source_dataset": "knights_knaves", "source_index": 0, "statements": [["lying", 1], ["or", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Zoey", "Riley"], "knight_knave_terms": {"knight": "sage", "knave": "fool", "a_knight": "a sage", "a_knave": "a fool", "Knight": "Sage", "Knave": "Fool"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")", "answer": "Riley is a pioneer, and Olivia is a laggard.", "metadata": {"source_dataset": "knights_knaves", "source_index": 1, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["and", ["telling-truth", 1], ["lying", 0]]], "solution": [true, false], "names": ["Riley", "Olivia"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")"}]}, "question": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")", "answer": "Samuel is a saint, and Jacob is a sinner.", "metadata": {"source_dataset": "knights_knaves", "source_index": 2, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["->", ["telling-truth", 0], ["lying", 0]]], "solution": [true, false], "names": ["Samuel", "Jacob"], "knight_knave_terms": {"knight": "saint", "knave": "sinner", "a_knight": "a saint", "a_knave": "a sinner", "Knight": "Saint", "Knave": "Sinner"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")", "answer": "Olivia is a laggard, and Lily is a pioneer.", "metadata": {"source_dataset": "knights_knaves", "source_index": 3, "statements": [["and", ["lying", 1], ["telling-truth", 1]], ["->", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Olivia", "Lily"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")"}]}, "question": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")", "answer": "Mason is an altruist, and Jack is an egoist.", "metadata": {"source_dataset": "knights_knaves", "source_index": 4, "statements": [["->", ["lying", 1], ["telling-truth", 0]], ["not", ["telling-truth", 0]]], "solution": [true, false], "names": ["Mason", "Jack"], "knight_knave_terms": {"knight": "altruist", "knave": "egoist", "a_knight": "an altruist", "a_knave": "an egoist", "Knight": "Altruist", "Knave": "Egoist"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
diff --git a/environments/reasoning_gym_orchestrator/prepare.py b/environments/reasoning_gym_orchestrator/prepare.py
new file mode 100644
index 000000000..fdc71a1f1
--- /dev/null
+++ b/environments/reasoning_gym_orchestrator/prepare.py
@@ -0,0 +1,315 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+# Single task with default config
+python environments/reasoning_gym_orchestrator/prepare.py --task knights_knaves --size 500 --seed 42 --output data/train.jsonl
+
+# Multiple tasks (creates composite dataset with equal weights)
+python environments/reasoning_gym_orchestrator/prepare.py --tasks knights_knaves,syllogism --size 500 --output data/train.jsonl
+
+# All tasks in a category
+python environments/reasoning_gym_orchestrator/prepare.py --category logic --size 500 --output data/train.jsonl
+
+# All tasks from all categories
+python environments/reasoning_gym_orchestrator/prepare.py --all-tasks --size 5000 --output data/train_all.jsonl
+
+# Single task with custom config
+python environments/reasoning_gym_orchestrator/prepare.py --task knights_knaves --size 500 --config '{"n_people": 3}' --output data/train.jsonl
+"""
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+import reasoning_gym
+from reasoning_gym.composite import DatasetSpec
+
+
+TASK_CATEGORIES = {
+    "logic": [
+        "knights_knaves",
+        "syllogism",
+        "propositional_logic",
+        "zebra_puzzles",
+        "aiw",
+        "circuit_logic",
+        "self_reference",
+    ],
+    "arithmetic": [
+        "basic_arithmetic",
+        "chain_sum",
+        "leg_counting",
+        "prime_factorization",
+        "gcd",
+        "lcm",
+        "time_intervals",
+        "calendar_arithmetic",
+        "dice",
+        "products",
+        "decimal_chain_sum",
+        "decimal_arithmetic",
+        "count_bits",
+        "bitwise_arithmetic",
+        "number_format",
+        "fraction_simplification",
+        "power_function",
+        "gsm_symbolic",
+    ],
+    "algebra": [
+        "simple_equations",
+        "polynomial_equations",
+        "polynomial_multiplication",
+        "simple_integration",
+        "intermediate_integration",
+        "complex_arithmetic",
+    ],
+    "algorithmic": [
+        "word_sorting",
+        "string_insertion",
+        "sentence_reordering",
+        "count_primes",
+        "binary_alternation",
+        "manipulate_matrix",
+        "pool_matrix",
+        "base_conversion",
+        "jugs",
+        "graph_color",
+        "spiral_matrix",
+        "cryptarithm",
+        "ab",
+        "spell_backward",
+        "letter_counting",
+        "letter_jumble",
+        "number_sorting",
+        "caesar_cipher",
+        "rotate_matrix",
+        "rotten_oranges",
+        "palindrome_generation",
+        "palindrome_partitioning",
+        "game_of_life",
+        "game_of_life_halting",
+        "group_anagrams",
+        "isomorphic_strings",
+        "string_synthesis",
+        "binary_matrix",
+        "word_sequence_reversal",
+        "number_filtering",
+        "word_ladder",
+        "string_manipulation",
+        "ransom_note",
+        "string_splitting",
+    ],
+    "cognition": [
+        "figlet_font",
+        "needle_haystack",
+        "modulo_grid",
+        "number_sequence",
+        "rubiks_cube",
+        "color_cube_rotation",
+        "rectangle_count",
+    ],
+    "arc": [
+        "arc_1d",
+        "arc_agi",
+        "rearc",
+    ],
+    "code": [
+        "bf",
+        "codeio",
+    ],
+    "games": [
+        "boxnet",
+        "countdown",
+        "emoji_mystery",
+        "futoshiki",
+        "kakurasu",
+        "knight_swap",
+        "mahjong_puzzle",
+        "maze",
+        "mini_sudoku",
+        "n_queens",
+        "puzzle24",
+        "rush_hour",
+        "sokoban",
+        "sudoku",
+        "survo",
+        "tower_of_hanoi",
+        "tsumego",
+    ],
+    "geometry": [
+        "advanced_geometry",
+        "simple_geometry",
+    ],
+    "graphs": [
+        "course_schedule",
+        "family_relationships",
+        "largest_island",
+        "quantum_lock",
+        "shortest_path",
+    ],
+    "induction": [
+        "acre",
+        "list_functions",
+    ],
+    "probability": [
+        "coin_flip",
+    ],
+}
+
+
+def format_entry_to_nemo_gym(entry: dict) -> dict:
+    return {
+        "responses_create_params": {
+            "input": [{"role": "user", "content": entry["question"]}],
+        },
+        **entry,
+        "agent_ref": {"type": "responses_api_agents", "name": "reasoning_gym_orchestrator_agent"},
+    }
+
+
+def create_single_task_dataset(task_name: str, size: int, seed: int, config: dict = None) -> list[dict]:
+    config = config or {}
+    config["size"] = size
+    config["seed"] = seed
+
+    print(f"Creating {task_name} dataset with {size} samples (seed={seed})...")
+    try:
+        dataset = reasoning_gym.create_dataset(task_name, **config)
+    except Exception as e:
+        print(f"Error creating dataset {task_name}: {e}")
+        return []
+
+    entries = []
+    for i in range(size):
+        try:
+            entry = dataset[i]
+            entries.append(format_entry_to_nemo_gym(entry))
+        except Exception as e:
+            print(f"Error generating entry {i} for {task_name}: {e}")
+            continue
+
+    print(f"Generated {len(entries)} entries for {task_name}")
+    return entries
+
+
+def create_composite_dataset(task_names: list[str], size: int, seed: int, config: dict = None) -> list[dict]:
+    config = config or {}
+
+    specs = [DatasetSpec(name=name, weight=1.0, config=config.get(name, {})) for name in task_names]
+
+    print(f"Creating composite dataset with {len(task_names)} tasks: {', '.join(task_names)}")
+    print(f"Total samples: {size}, seed: {seed}")
+
+    try:
+        dataset = reasoning_gym.create_dataset("composite", size=size, seed=seed, datasets=specs)
+    except Exception as e:
+        print(f"Error creating composite dataset: {e}")
+        return []
+
+    entries = []
+    for i in range(size):
+        try:
+            entry = dataset[i]
+            entries.append(format_entry_to_nemo_gym(entry))
+        except Exception as e:
+            print(f"Error generating entry {i}: {e}")
+            continue
+
+    task_counts = {}
+    for entry in entries:
+        task = entry["metadata"]["source_dataset"]
+        task_counts[task] = task_counts.get(task, 0) + 1
+
+    print(f"\nGenerated {len(entries)} total entries")
+    print("Task distribution:")
+    for task, count in sorted(task_counts.items()):
+        print(f"  {task}: {count}")
+
+    return entries
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Create reasoning_gym datasets in NeMo Gym format",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+
+    task_group = parser.add_mutually_exclusive_group(required=True)
+    task_group.add_argument("--task", type=str, help="Single task name (e.g., knights_knaves)")
+    task_group.add_argument("--tasks", type=str, help="Comma-separated list of tasks")
+    task_group.add_argument(
+        "--category", type=str, choices=list(TASK_CATEGORIES.keys()), help="Task category (e.g., logic, arithmetic)"
+    )
+    task_group.add_argument("--all-tasks", action="store_true", help="Use all tasks from all categories")
+
+    parser.add_argument("--size", type=int, default=500, help="Number of samples to generate (default: 500)")
+    parser.add_argument("--seed", type=int, default=42, help="Random seed (default: 42)")
+    parser.add_argument(
+        "--config",
+        type=str,
+        default="{}",
+        help="Task-specific config as JSON string (e.g., '{\"n_people\": 3}')",
+    )
+
+    parser.add_argument(
+        "--output", type=str, required=True, help="Output JSONL file path (e.g., data/train_knights_knaves.jsonl)"
+    )
+
+    args = parser.parse_args()
+
+    try:
+        config = json.loads(args.config)
+    except json.JSONDecodeError as e:
+        print(f"Error parsing config JSON: {e}")
+        sys.exit(1)
+
+    if args.task:
+        task_names = [args.task]
+    elif args.tasks:
+        task_names = [t.strip() for t in args.tasks.split(",")]
+    elif args.category:
+        task_names = TASK_CATEGORIES[args.category]
+    elif args.all_tasks:
+        task_names = []
+        for category_tasks in TASK_CATEGORIES.values():
+            task_names.extend(category_tasks)
+    else:
+        print("Error: Must specify --task, --tasks, --category, or --all-tasks")
+        sys.exit(1)
+
+    if len(task_names) == 1:
+        entries = create_single_task_dataset(task_names[0], args.size, args.seed, config)
+    else:
+        entries = create_composite_dataset(task_names, args.size, args.seed, config)
+
+    if not entries:
+        print("Error: No entries generated")
+        sys.exit(1)
+
+    output_path = Path(args.output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with open(output_path, "w") as f:
+        for entry in entries:
+            f.write(json.dumps(entry) + "\n")
+
+    print(f"\nDataset saved to {output_path}")
+    print(f"Total entries: {len(entries)}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/environments/reasoning_gym_parallel_thinking/README.md b/environments/reasoning_gym_parallel_thinking/README.md
new file mode 100644
index 000000000..a209b039e
--- /dev/null
+++ b/environments/reasoning_gym_parallel_thinking/README.md
@@ -0,0 +1,28 @@
+# Reasoning Gym — LangGraph Parallel Thinking Agent
+
+LangGraph parallel thinking agent compatible with resource servers that do not use tools; enables diverse agent training data and test time scaling vs a simple agent, extensible to use tools or other agent architectures.
+
+Source benchmark: https://github.com/open-thought/reasoning-gym
+
+## Quick start
+
+```bash
+ng_run "+config_paths=[environments/reasoning_gym_parallel_thinking/config.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]"
+```
+
+```bash
+ng_collect_rollouts \
+    +agent_name=reasoning_gym_parallel_thinking_agent \
+    +input_jsonl_fpath=environments/reasoning_gym_parallel_thinking/data/example.jsonl \
+    +output_jsonl_fpath=results/reasoning_gym_parallel_thinking_rollouts.jsonl
+```
+
+## Prepare training data
+
+```bash
+python environments/reasoning_gym_parallel_thinking/prepare.py --task knights_knaves --size 1000 --output environments/reasoning_gym_parallel_thinking/data/train_knights_knaves.jsonl
+```
+
+See `prepare.py` for all available tasks, categories, and config options.
+
+Alternatively, a pre-built dataset is hosted on HuggingFace at [nvidia/Nemotron-RL-ReasoningGym-v1](https://huggingface.co/datasets/nvidia/Nemotron-RL-ReasoningGym-v1).
diff --git a/environments/reasoning_gym_parallel_thinking/__init__.py b/environments/reasoning_gym_parallel_thinking/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/environments/reasoning_gym_parallel_thinking/config.yaml b/environments/reasoning_gym_parallel_thinking/config.yaml
new file mode 100644
index 000000000..fc6f0bb78
--- /dev/null
+++ b/environments/reasoning_gym_parallel_thinking/config.yaml
@@ -0,0 +1,27 @@
+reasoning_gym:
+  resources_servers:
+    reasoning_gym:
+      entrypoint: app.py
+      domain: knowledge
+      description: LangGraph parallel thinking agent compatible with resource servers that do not use tools; enables diverse agent training data and test time scaling vs a simple agent, extensible to use tools or other agent architectures
+      value: Iterative test time scaling for improved performance in reasoning tasks
+      verified: false
+
+reasoning_gym_parallel_thinking_agent:
+  responses_api_agents:
+    langgraph_agent:
+      entrypoint: parallel_thinking_agent.py
+      resources_server:
+        type: resources_servers
+        name: reasoning_gym
+      model_server:
+        type: responses_api_models
+        name: policy_model
+      datasets:
+      - name: example
+        type: example
+        jsonl_fpath: environments/reasoning_gym_parallel_thinking/data/example.jsonl
+      - name: train
+        type: train
+        jsonl_fpath: environments/reasoning_gym_parallel_thinking/data/train_knights_knaves.jsonl
+        license: Apache 2.0
diff --git a/environments/reasoning_gym_parallel_thinking/data/.gitignore b/environments/reasoning_gym_parallel_thinking/data/.gitignore
new file mode 100644
index 000000000..4424b6fde
--- /dev/null
+++ b/environments/reasoning_gym_parallel_thinking/data/.gitignore
@@ -0,0 +1,5 @@
+*train.jsonl
+*validation.jsonl
+*train_prepare.jsonl
+*validation_prepare.jsonl
+*example_prepare.jsonl
diff --git a/environments/reasoning_gym_parallel_thinking/data/example.jsonl b/environments/reasoning_gym_parallel_thinking/data/example.jsonl
new file mode 100644
index 000000000..533ef47c8
--- /dev/null
+++ b/environments/reasoning_gym_parallel_thinking/data/example.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")"}]}, "question": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")", "answer": "Zoey is a fool, and Riley is a sage.", "metadata": {"source_dataset": "knights_knaves", "source_index": 0, "statements": [["lying", 1], ["or", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Zoey", "Riley"], "knight_knave_terms": {"knight": "sage", "knave": "fool", "a_knight": "a sage", "a_knave": "a fool", "Knight": "Sage", "Knave": "Fool"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")", "answer": "Riley is a pioneer, and Olivia is a laggard.", "metadata": {"source_dataset": "knights_knaves", "source_index": 1, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["and", ["telling-truth", 1], ["lying", 0]]], "solution": [true, false], "names": ["Riley", "Olivia"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")"}]}, "question": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")", "answer": "Samuel is a saint, and Jacob is a sinner.", "metadata": {"source_dataset": "knights_knaves", "source_index": 2, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["->", ["telling-truth", 0], ["lying", 0]]], "solution": [true, false], "names": ["Samuel", "Jacob"], "knight_knave_terms": {"knight": "saint", "knave": "sinner", "a_knight": "a saint", "a_knave": "a sinner", "Knight": "Saint", "Knave": "Sinner"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")", "answer": "Olivia is a laggard, and Lily is a pioneer.", "metadata": {"source_dataset": "knights_knaves", "source_index": 3, "statements": [["and", ["lying", 1], ["telling-truth", 1]], ["->", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Olivia", "Lily"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")"}]}, "question": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")", "answer": "Mason is an altruist, and Jack is an egoist.", "metadata": {"source_dataset": "knights_knaves", "source_index": 4, "statements": [["->", ["lying", 1], ["telling-truth", 0]], ["not", ["telling-truth", 0]]], "solution": [true, false], "names": ["Mason", "Jack"], "knight_knave_terms": {"knight": "altruist", "knave": "egoist", "a_knight": "an altruist", "a_knave": "an egoist", "Knight": "Altruist", "Knave": "Egoist"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
diff --git a/environments/reasoning_gym_parallel_thinking/prepare.py b/environments/reasoning_gym_parallel_thinking/prepare.py
new file mode 100644
index 000000000..f716d9ef7
--- /dev/null
+++ b/environments/reasoning_gym_parallel_thinking/prepare.py
@@ -0,0 +1,315 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+# Single task with default config
+python environments/reasoning_gym_parallel_thinking/prepare.py --task knights_knaves --size 500 --seed 42 --output data/train.jsonl
+
+# Multiple tasks (creates composite dataset with equal weights)
+python environments/reasoning_gym_parallel_thinking/prepare.py --tasks knights_knaves,syllogism --size 500 --output data/train.jsonl
+
+# All tasks in a category
+python environments/reasoning_gym_parallel_thinking/prepare.py --category logic --size 500 --output data/train.jsonl
+
+# All tasks from all categories
+python environments/reasoning_gym_parallel_thinking/prepare.py --all-tasks --size 5000 --output data/train_all.jsonl
+
+# Single task with custom config
+python environments/reasoning_gym_parallel_thinking/prepare.py --task knights_knaves --size 500 --config '{"n_people": 3}' --output data/train.jsonl
+"""
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+import reasoning_gym
+from reasoning_gym.composite import DatasetSpec
+
+
+TASK_CATEGORIES = {
+    "logic": [
+        "knights_knaves",
+        "syllogism",
+        "propositional_logic",
+        "zebra_puzzles",
+        "aiw",
+        "circuit_logic",
+        "self_reference",
+    ],
+    "arithmetic": [
+        "basic_arithmetic",
+        "chain_sum",
+        "leg_counting",
+        "prime_factorization",
+        "gcd",
+        "lcm",
+        "time_intervals",
+        "calendar_arithmetic",
+        "dice",
+        "products",
+        "decimal_chain_sum",
+        "decimal_arithmetic",
+        "count_bits",
+        "bitwise_arithmetic",
+        "number_format",
+        "fraction_simplification",
+        "power_function",
+        "gsm_symbolic",
+    ],
+    "algebra": [
+        "simple_equations",
+        "polynomial_equations",
+        "polynomial_multiplication",
+        "simple_integration",
+        "intermediate_integration",
+        "complex_arithmetic",
+    ],
+    "algorithmic": [
+        "word_sorting",
+        "string_insertion",
+        "sentence_reordering",
+        "count_primes",
+        "binary_alternation",
+        "manipulate_matrix",
+        "pool_matrix",
+        "base_conversion",
+        "jugs",
+        "graph_color",
+        "spiral_matrix",
+        "cryptarithm",
+        "ab",
+        "spell_backward",
+        "letter_counting",
+        "letter_jumble",
+        "number_sorting",
+        "caesar_cipher",
+        "rotate_matrix",
+        "rotten_oranges",
+        "palindrome_generation",
+        "palindrome_partitioning",
+        "game_of_life",
+        "game_of_life_halting",
+        "group_anagrams",
+        "isomorphic_strings",
+        "string_synthesis",
+        "binary_matrix",
+        "word_sequence_reversal",
+        "number_filtering",
+        "word_ladder",
+        "string_manipulation",
+        "ransom_note",
+        "string_splitting",
+    ],
+    "cognition": [
+        "figlet_font",
+        "needle_haystack",
+        "modulo_grid",
+        "number_sequence",
+        "rubiks_cube",
+        "color_cube_rotation",
+        "rectangle_count",
+    ],
+    "arc": [
+        "arc_1d",
+        "arc_agi",
+        "rearc",
+    ],
+    "code": [
+        "bf",
+        "codeio",
+    ],
+    "games": [
+        "boxnet",
+        "countdown",
+        "emoji_mystery",
+        "futoshiki",
+        "kakurasu",
+        "knight_swap",
+        "mahjong_puzzle",
+        "maze",
+        "mini_sudoku",
+        "n_queens",
+        "puzzle24",
+        "rush_hour",
+        "sokoban",
+        "sudoku",
+        "survo",
+        "tower_of_hanoi",
+        "tsumego",
+    ],
+    "geometry": [
+        "advanced_geometry",
+        "simple_geometry",
+    ],
+    "graphs": [
+        "course_schedule",
+        "family_relationships",
+        "largest_island",
+        "quantum_lock",
+        "shortest_path",
+    ],
+    "induction": [
+        "acre",
+        "list_functions",
+    ],
+    "probability": [
+        "coin_flip",
+    ],
+}
+
+
+def format_entry_to_nemo_gym(entry: dict) -> dict:
+    return {
+        "responses_create_params": {
+            "input": [{"role": "user", "content": entry["question"]}],
+        },
+        **entry,
+        "agent_ref": {"type": "responses_api_agents", "name": "reasoning_gym_parallel_thinking_agent"},
+    }
+
+
+def create_single_task_dataset(task_name: str, size: int, seed: int, config: dict = None) -> list[dict]:
+    config = config or {}
+    config["size"] = size
+    config["seed"] = seed
+
+    print(f"Creating {task_name} dataset with {size} samples (seed={seed})...")
+    try:
+        dataset = reasoning_gym.create_dataset(task_name, **config)
+    except Exception as e:
+        print(f"Error creating dataset {task_name}: {e}")
+        return []
+
+    entries = []
+    for i in range(size):
+        try:
+            entry = dataset[i]
+            entries.append(format_entry_to_nemo_gym(entry))
+        except Exception as e:
+            print(f"Error generating entry {i} for {task_name}: {e}")
+            continue
+
+    print(f"Generated {len(entries)} entries for {task_name}")
+    return entries
+
+
+def create_composite_dataset(task_names: list[str], size: int, seed: int, config: dict = None) -> list[dict]:
+    config = config or {}
+
+    specs = [DatasetSpec(name=name, weight=1.0, config=config.get(name, {})) for name in task_names]
+
+    print(f"Creating composite dataset with {len(task_names)} tasks: {', '.join(task_names)}")
+    print(f"Total samples: {size}, seed: {seed}")
+
+    try:
+        dataset = reasoning_gym.create_dataset("composite", size=size, seed=seed, datasets=specs)
+    except Exception as e:
+        print(f"Error creating composite dataset: {e}")
+        return []
+
+    entries = []
+    for i in range(size):
+        try:
+            entry = dataset[i]
+            entries.append(format_entry_to_nemo_gym(entry))
+        except Exception as e:
+            print(f"Error generating entry {i}: {e}")
+            continue
+
+    task_counts = {}
+    for entry in entries:
+        task = entry["metadata"]["source_dataset"]
+        task_counts[task] = task_counts.get(task, 0) + 1
+
+    print(f"\nGenerated {len(entries)} total entries")
+    print("Task distribution:")
+    for task, count in sorted(task_counts.items()):
+        print(f"  {task}: {count}")
+
+    return entries
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Create reasoning_gym datasets in NeMo Gym format",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+
+    task_group = parser.add_mutually_exclusive_group(required=True)
+    task_group.add_argument("--task", type=str, help="Single task name (e.g., knights_knaves)")
+    task_group.add_argument("--tasks", type=str, help="Comma-separated list of tasks")
+    task_group.add_argument(
+        "--category", type=str, choices=list(TASK_CATEGORIES.keys()), help="Task category (e.g., logic, arithmetic)"
+    )
+    task_group.add_argument("--all-tasks", action="store_true", help="Use all tasks from all categories")
+
+    parser.add_argument("--size", type=int, default=500, help="Number of samples to generate (default: 500)")
+    parser.add_argument("--seed", type=int, default=42, help="Random seed (default: 42)")
+    parser.add_argument(
+        "--config",
+        type=str,
+        default="{}",
+        help="Task-specific config as JSON string (e.g., '{\"n_people\": 3}')",
+    )
+
+    parser.add_argument(
+        "--output", type=str, required=True, help="Output JSONL file path (e.g., data/train_knights_knaves.jsonl)"
+    )
+
+    args = parser.parse_args()
+
+    try:
+        config = json.loads(args.config)
+    except json.JSONDecodeError as e:
+        print(f"Error parsing config JSON: {e}")
+        sys.exit(1)
+
+    if args.task:
+        task_names = [args.task]
+    elif args.tasks:
+        task_names = [t.strip() for t in args.tasks.split(",")]
+    elif args.category:
+        task_names = TASK_CATEGORIES[args.category]
+    elif args.all_tasks:
+        task_names = []
+        for category_tasks in TASK_CATEGORIES.values():
+            task_names.extend(category_tasks)
+    else:
+        print("Error: Must specify --task, --tasks, --category, or --all-tasks")
+        sys.exit(1)
+
+    if len(task_names) == 1:
+        entries = create_single_task_dataset(task_names[0], args.size, args.seed, config)
+    else:
+        entries = create_composite_dataset(task_names, args.size, args.seed, config)
+
+    if not entries:
+        print("Error: No entries generated")
+        sys.exit(1)
+
+    output_path = Path(args.output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with open(output_path, "w") as f:
+        for entry in entries:
+            f.write(json.dumps(entry) + "\n")
+
+    print(f"\nDataset saved to {output_path}")
+    print(f"Total entries: {len(entries)}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/environments/reasoning_gym_reflection/README.md b/environments/reasoning_gym_reflection/README.md
new file mode 100644
index 000000000..64f010ca2
--- /dev/null
+++ b/environments/reasoning_gym_reflection/README.md
@@ -0,0 +1,28 @@
+# Reasoning Gym — LangGraph Reflection Agent
+
+LangGraph reflection agent compatible with resource servers that do not use tools; provides iterative reflection for diverse agent training data and test time scaling, extensible to use tools or other agent architectures.
+
+Source benchmark: https://github.com/open-thought/reasoning-gym
+
+## Quick start
+
+```bash
+ng_run "+config_paths=[environments/reasoning_gym_reflection/config.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]"
+```
+
+```bash
+ng_collect_rollouts \
+    +agent_name=reasoning_gym_reflection_agent \
+    +input_jsonl_fpath=environments/reasoning_gym_reflection/data/example.jsonl \
+    +output_jsonl_fpath=results/reasoning_gym_reflection_rollouts.jsonl
+```
+
+## Prepare training data
+
+```bash
+python environments/reasoning_gym_reflection/prepare.py --task knights_knaves --size 1000 --output environments/reasoning_gym_reflection/data/train_knights_knaves.jsonl
+```
+
+See `prepare.py` for all available tasks, categories, and config options.
+
+Alternatively, a pre-built dataset is hosted on HuggingFace at [nvidia/Nemotron-RL-ReasoningGym-v1](https://huggingface.co/datasets/nvidia/Nemotron-RL-ReasoningGym-v1).
diff --git a/environments/reasoning_gym_reflection/__init__.py b/environments/reasoning_gym_reflection/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/environments/reasoning_gym_reflection/config.yaml b/environments/reasoning_gym_reflection/config.yaml
new file mode 100644
index 000000000..9c95a502b
--- /dev/null
+++ b/environments/reasoning_gym_reflection/config.yaml
@@ -0,0 +1,28 @@
+reasoning_gym:
+  resources_servers:
+    reasoning_gym:
+      entrypoint: app.py
+      domain: knowledge
+      description: LangGraph reflection agent compatible with resource servers that do not use tools; provides iterative reflection for diverse agent training data and test time scaling, extensible to use tools or other agent architectures
+      value: Iterative test time scaling for improved performance in reasoning tasks
+      verified: false
+
+reasoning_gym_reflection_agent:
+  responses_api_agents:
+    langgraph_agent:
+      entrypoint: reflection_agent.py
+      resources_server:
+        type: resources_servers
+        name: reasoning_gym
+      model_server:
+        type: responses_api_models
+        name: policy_model
+      max_reflections: 2
+      datasets:
+      - name: example
+        type: example
+        jsonl_fpath: environments/reasoning_gym_reflection/data/example.jsonl
+      - name: train
+        type: train
+        jsonl_fpath: environments/reasoning_gym_reflection/data/train_knights_knaves.jsonl
+        license: Apache 2.0
diff --git a/environments/reasoning_gym_reflection/data/.gitignore b/environments/reasoning_gym_reflection/data/.gitignore
new file mode 100644
index 000000000..4424b6fde
--- /dev/null
+++ b/environments/reasoning_gym_reflection/data/.gitignore
@@ -0,0 +1,5 @@
+*train.jsonl
+*validation.jsonl
+*train_prepare.jsonl
+*validation_prepare.jsonl
+*example_prepare.jsonl
diff --git a/environments/reasoning_gym_reflection/data/example.jsonl b/environments/reasoning_gym_reflection/data/example.jsonl
new file mode 100644
index 000000000..533ef47c8
--- /dev/null
+++ b/environments/reasoning_gym_reflection/data/example.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")"}]}, "question": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")", "answer": "Zoey is a fool, and Riley is a sage.", "metadata": {"source_dataset": "knights_knaves", "source_index": 0, "statements": [["lying", 1], ["or", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Zoey", "Riley"], "knight_knave_terms": {"knight": "sage", "knave": "fool", "a_knight": "a sage", "a_knave": "a fool", "Knight": "Sage", "Knave": "Fool"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")", "answer": "Riley is a pioneer, and Olivia is a laggard.", "metadata": {"source_dataset": "knights_knaves", "source_index": 1, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["and", ["telling-truth", 1], ["lying", 0]]], "solution": [true, false], "names": ["Riley", "Olivia"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")"}]}, "question": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")", "answer": "Samuel is a saint, and Jacob is a sinner.", "metadata": {"source_dataset": "knights_knaves", "source_index": 2, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["->", ["telling-truth", 0], ["lying", 0]]], "solution": [true, false], "names": ["Samuel", "Jacob"], "knight_knave_terms": {"knight": "saint", "knave": "sinner", "a_knight": "a saint", "a_knave": "a sinner", "Knight": "Saint", "Knave": "Sinner"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")", "answer": "Olivia is a laggard, and Lily is a pioneer.", "metadata": {"source_dataset": "knights_knaves", "source_index": 3, "statements": [["and", ["lying", 1], ["telling-truth", 1]], ["->", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Olivia", "Lily"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")"}]}, "question": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")", "answer": "Mason is an altruist, and Jack is an egoist.", "metadata": {"source_dataset": "knights_knaves", "source_index": 4, "statements": [["->", ["lying", 1], ["telling-truth", 0]], ["not", ["telling-truth", 0]]], "solution": [true, false], "names": ["Mason", "Jack"], "knight_knave_terms": {"knight": "altruist", "knave": "egoist", "a_knight": "an altruist", "a_knave": "an egoist", "Knight": "Altruist", "Knave": "Egoist"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
diff --git a/environments/reasoning_gym_reflection/prepare.py b/environments/reasoning_gym_reflection/prepare.py
new file mode 100644
index 000000000..2a5215813
--- /dev/null
+++ b/environments/reasoning_gym_reflection/prepare.py
@@ -0,0 +1,315 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+# Single task with default config
+python environments/reasoning_gym_reflection/prepare.py --task knights_knaves --size 500 --seed 42 --output data/train.jsonl
+
+# Multiple tasks (creates composite dataset with equal weights)
+python environments/reasoning_gym_reflection/prepare.py --tasks knights_knaves,syllogism --size 500 --output data/train.jsonl
+
+# All tasks in a category
+python environments/reasoning_gym_reflection/prepare.py --category logic --size 500 --output data/train.jsonl
+
+# All tasks from all categories
+python environments/reasoning_gym_reflection/prepare.py --all-tasks --size 5000 --output data/train_all.jsonl
+
+# Single task with custom config
+python environments/reasoning_gym_reflection/prepare.py --task knights_knaves --size 500 --config '{"n_people": 3}' --output data/train.jsonl
+"""
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+import reasoning_gym
+from reasoning_gym.composite import DatasetSpec
+
+
+TASK_CATEGORIES = {
+    "logic": [
+        "knights_knaves",
+        "syllogism",
+        "propositional_logic",
+        "zebra_puzzles",
+        "aiw",
+        "circuit_logic",
+        "self_reference",
+    ],
+    "arithmetic": [
+        "basic_arithmetic",
+        "chain_sum",
+        "leg_counting",
+        "prime_factorization",
+        "gcd",
+        "lcm",
+        "time_intervals",
+        "calendar_arithmetic",
+        "dice",
+        "products",
+        "decimal_chain_sum",
+        "decimal_arithmetic",
+        "count_bits",
+        "bitwise_arithmetic",
+        "number_format",
+        "fraction_simplification",
+        "power_function",
+        "gsm_symbolic",
+    ],
+    "algebra": [
+        "simple_equations",
+        "polynomial_equations",
+        "polynomial_multiplication",
+        "simple_integration",
+        "intermediate_integration",
+        "complex_arithmetic",
+    ],
+    "algorithmic": [
+        "word_sorting",
+        "string_insertion",
+        "sentence_reordering",
+        "count_primes",
+        "binary_alternation",
+        "manipulate_matrix",
+        "pool_matrix",
+        "base_conversion",
+        "jugs",
+        "graph_color",
+        "spiral_matrix",
+        "cryptarithm",
+        "ab",
+        "spell_backward",
+        "letter_counting",
+        "letter_jumble",
+        "number_sorting",
+        "caesar_cipher",
+        "rotate_matrix",
+        "rotten_oranges",
+        "palindrome_generation",
+        "palindrome_partitioning",
+        "game_of_life",
+        "game_of_life_halting",
+        "group_anagrams",
+        "isomorphic_strings",
+        "string_synthesis",
+        "binary_matrix",
+        "word_sequence_reversal",
+        "number_filtering",
+        "word_ladder",
+        "string_manipulation",
+        "ransom_note",
+        "string_splitting",
+    ],
+    "cognition": [
+        "figlet_font",
+        "needle_haystack",
+        "modulo_grid",
+        "number_sequence",
+        "rubiks_cube",
+        "color_cube_rotation",
+        "rectangle_count",
+    ],
+    "arc": [
+        "arc_1d",
+        "arc_agi",
+        "rearc",
+    ],
+    "code": [
+        "bf",
+        "codeio",
+    ],
+    "games": [
+        "boxnet",
+        "countdown",
+        "emoji_mystery",
+        "futoshiki",
+        "kakurasu",
+        "knight_swap",
+        "mahjong_puzzle",
+        "maze",
+        "mini_sudoku",
+        "n_queens",
+        "puzzle24",
+        "rush_hour",
+        "sokoban",
+        "sudoku",
+        "survo",
+        "tower_of_hanoi",
+        "tsumego",
+    ],
+    "geometry": [
+        "advanced_geometry",
+        "simple_geometry",
+    ],
+    "graphs": [
+        "course_schedule",
+        "family_relationships",
+        "largest_island",
+        "quantum_lock",
+        "shortest_path",
+    ],
+    "induction": [
+        "acre",
+        "list_functions",
+    ],
+    "probability": [
+        "coin_flip",
+    ],
+}
+
+
+def format_entry_to_nemo_gym(entry: dict) -> dict:
+    return {
+        "responses_create_params": {
+            "input": [{"role": "user", "content": entry["question"]}],
+        },
+        **entry,
+        "agent_ref": {"type": "responses_api_agents", "name": "reasoning_gym_reflection_agent"},
+    }
+
+
+def create_single_task_dataset(task_name: str, size: int, seed: int, config: dict = None) -> list[dict]:
+    config = config or {}
+    config["size"] = size
+    config["seed"] = seed
+
+    print(f"Creating {task_name} dataset with {size} samples (seed={seed})...")
+    try:
+        dataset = reasoning_gym.create_dataset(task_name, **config)
+    except Exception as e:
+        print(f"Error creating dataset {task_name}: {e}")
+        return []
+
+    entries = []
+    for i in range(size):
+        try:
+            entry = dataset[i]
+            entries.append(format_entry_to_nemo_gym(entry))
+        except Exception as e:
+            print(f"Error generating entry {i} for {task_name}: {e}")
+            continue
+
+    print(f"Generated {len(entries)} entries for {task_name}")
+    return entries
+
+
+def create_composite_dataset(task_names: list[str], size: int, seed: int, config: dict = None) -> list[dict]:
+    config = config or {}
+
+    specs = [DatasetSpec(name=name, weight=1.0, config=config.get(name, {})) for name in task_names]
+
+    print(f"Creating composite dataset with {len(task_names)} tasks: {', '.join(task_names)}")
+    print(f"Total samples: {size}, seed: {seed}")
+
+    try:
+        dataset = reasoning_gym.create_dataset("composite", size=size, seed=seed, datasets=specs)
+    except Exception as e:
+        print(f"Error creating composite dataset: {e}")
+        return []
+
+    entries = []
+    for i in range(size):
+        try:
+            entry = dataset[i]
+            entries.append(format_entry_to_nemo_gym(entry))
+        except Exception as e:
+            print(f"Error generating entry {i}: {e}")
+            continue
+
+    task_counts = {}
+    for entry in entries:
+        task = entry["metadata"]["source_dataset"]
+        task_counts[task] = task_counts.get(task, 0) + 1
+
+    print(f"\nGenerated {len(entries)} total entries")
+    print("Task distribution:")
+    for task, count in sorted(task_counts.items()):
+        print(f"  {task}: {count}")
+
+    return entries
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Create reasoning_gym datasets in NeMo Gym format",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+
+    task_group = parser.add_mutually_exclusive_group(required=True)
+    task_group.add_argument("--task", type=str, help="Single task name (e.g., knights_knaves)")
+    task_group.add_argument("--tasks", type=str, help="Comma-separated list of tasks")
+    task_group.add_argument(
+        "--category", type=str, choices=list(TASK_CATEGORIES.keys()), help="Task category (e.g., logic, arithmetic)"
+    )
+    task_group.add_argument("--all-tasks", action="store_true", help="Use all tasks from all categories")
+
+    parser.add_argument("--size", type=int, default=500, help="Number of samples to generate (default: 500)")
+    parser.add_argument("--seed", type=int, default=42, help="Random seed (default: 42)")
+    parser.add_argument(
+        "--config",
+        type=str,
+        default="{}",
+        help="Task-specific config as JSON string (e.g., '{\"n_people\": 3}')",
+    )
+
+    parser.add_argument(
+        "--output", type=str, required=True, help="Output JSONL file path (e.g., data/train_knights_knaves.jsonl)"
+    )
+
+    args = parser.parse_args()
+
+    try:
+        config = json.loads(args.config)
+    except json.JSONDecodeError as e:
+        print(f"Error parsing config JSON: {e}")
+        sys.exit(1)
+
+    if args.task:
+        task_names = [args.task]
+    elif args.tasks:
+        task_names = [t.strip() for t in args.tasks.split(",")]
+    elif args.category:
+        task_names = TASK_CATEGORIES[args.category]
+    elif args.all_tasks:
+        task_names = []
+        for category_tasks in TASK_CATEGORIES.values():
+            task_names.extend(category_tasks)
+    else:
+        print("Error: Must specify --task, --tasks, --category, or --all-tasks")
+        sys.exit(1)
+
+    if len(task_names) == 1:
+        entries = create_single_task_dataset(task_names[0], args.size, args.seed, config)
+    else:
+        entries = create_composite_dataset(task_names, args.size, args.seed, config)
+
+    if not entries:
+        print("Error: No entries generated")
+        sys.exit(1)
+
+    output_path = Path(args.output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with open(output_path, "w") as f:
+        for entry in entries:
+            f.write(json.dumps(entry) + "\n")
+
+    print(f"\nDataset saved to {output_path}")
+    print(f"Total entries: {len(entries)}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/environments/reasoning_gym_rewoo/README.md b/environments/reasoning_gym_rewoo/README.md
new file mode 100644
index 000000000..5b986aba5
--- /dev/null
+++ b/environments/reasoning_gym_rewoo/README.md
@@ -0,0 +1,28 @@
+# Reasoning Gym — LangGraph ReWOO Agent
+
+LangGraph ReWOO agent compatible with resource servers that do not use tools; enables diverse agent training data and test time scaling vs a simple agent, extensible to use tools or other agent architectures.
+
+Source benchmark: https://github.com/open-thought/reasoning-gym
+
+## Quick start
+
+```bash
+ng_run "+config_paths=[environments/reasoning_gym_rewoo/config.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]"
+```
+
+```bash
+ng_collect_rollouts \
+    +agent_name=reasoning_gym_rewoo_agent \
+    +input_jsonl_fpath=environments/reasoning_gym_rewoo/data/example.jsonl \
+    +output_jsonl_fpath=results/reasoning_gym_rewoo_rollouts.jsonl
+```
+
+## Prepare training data
+
+```bash
+python environments/reasoning_gym_rewoo/prepare.py --task knights_knaves --size 1000 --output environments/reasoning_gym_rewoo/data/train_knights_knaves.jsonl
+```
+
+See `prepare.py` for all available tasks, categories, and config options.
+
+Alternatively, a pre-built dataset is hosted on HuggingFace at [nvidia/Nemotron-RL-ReasoningGym-v1](https://huggingface.co/datasets/nvidia/Nemotron-RL-ReasoningGym-v1).
diff --git a/environments/reasoning_gym_rewoo/__init__.py b/environments/reasoning_gym_rewoo/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/environments/reasoning_gym_rewoo/config.yaml b/environments/reasoning_gym_rewoo/config.yaml
new file mode 100644
index 000000000..7db7b87a8
--- /dev/null
+++ b/environments/reasoning_gym_rewoo/config.yaml
@@ -0,0 +1,27 @@
+reasoning_gym:
+  resources_servers:
+    reasoning_gym:
+      entrypoint: app.py
+      domain: knowledge
+      description: LangGraph ReWOO agent compatible with resource servers that do not use tools; enables diverse agent training data and test time scaling vs a simple agent, extensible to use tools or other agent architectures
+      value: Iterative test time scaling for improved performance in reasoning tasks
+      verified: false
+
+reasoning_gym_rewoo_agent:
+  responses_api_agents:
+    langgraph_agent:
+      entrypoint: rewoo_agent.py
+      resources_server:
+        type: resources_servers
+        name: reasoning_gym
+      model_server:
+        type: responses_api_models
+        name: policy_model
+      datasets:
+      - name: example
+        type: example
+        jsonl_fpath: environments/reasoning_gym_rewoo/data/example.jsonl
+      - name: train
+        type: train
+        jsonl_fpath: environments/reasoning_gym_rewoo/data/train_knights_knaves.jsonl
+        license: Apache 2.0
diff --git a/environments/reasoning_gym_rewoo/data/.gitignore b/environments/reasoning_gym_rewoo/data/.gitignore
new file mode 100644
index 000000000..4424b6fde
--- /dev/null
+++ b/environments/reasoning_gym_rewoo/data/.gitignore
@@ -0,0 +1,5 @@
+*train.jsonl
+*validation.jsonl
+*train_prepare.jsonl
+*validation_prepare.jsonl
+*example_prepare.jsonl
diff --git a/environments/reasoning_gym_rewoo/data/example.jsonl b/environments/reasoning_gym_rewoo/data/example.jsonl
new file mode 100644
index 000000000..533ef47c8
--- /dev/null
+++ b/environments/reasoning_gym_rewoo/data/example.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")"}]}, "question": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")", "answer": "Zoey is a fool, and Riley is a sage.", "metadata": {"source_dataset": "knights_knaves", "source_index": 0, "statements": [["lying", 1], ["or", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Zoey", "Riley"], "knight_knave_terms": {"knight": "sage", "knave": "fool", "a_knight": "a sage", "a_knave": "a fool", "Knight": "Sage", "Knave": "Fool"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")", "answer": "Riley is a pioneer, and Olivia is a laggard.", "metadata": {"source_dataset": "knights_knaves", "source_index": 1, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["and", ["telling-truth", 1], ["lying", 0]]], "solution": [true, false], "names": ["Riley", "Olivia"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")"}]}, "question": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")", "answer": "Samuel is a saint, and Jacob is a sinner.", "metadata": {"source_dataset": "knights_knaves", "source_index": 2, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["->", ["telling-truth", 0], ["lying", 0]]], "solution": [true, false], "names": ["Samuel", "Jacob"], "knight_knave_terms": {"knight": "saint", "knave": "sinner", "a_knight": "a saint", "a_knave": "a sinner", "Knight": "Saint", "Knave": "Sinner"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")", "answer": "Olivia is a laggard, and Lily is a pioneer.", "metadata": {"source_dataset": "knights_knaves", "source_index": 3, "statements": [["and", ["lying", 1], ["telling-truth", 1]], ["->", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Olivia", "Lily"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")"}]}, "question": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")", "answer": "Mason is an altruist, and Jack is an egoist.", "metadata": {"source_dataset": "knights_knaves", "source_index": 4, "statements": [["->", ["lying", 1], ["telling-truth", 0]], ["not", ["telling-truth", 0]]], "solution": [true, false], "names": ["Mason", "Jack"], "knight_knave_terms": {"knight": "altruist", "knave": "egoist", "a_knight": "an altruist", "a_knave": "an egoist", "Knight": "Altruist", "Knave": "Egoist"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
diff --git a/environments/reasoning_gym_rewoo/prepare.py b/environments/reasoning_gym_rewoo/prepare.py
new file mode 100644
index 000000000..82624393a
--- /dev/null
+++ b/environments/reasoning_gym_rewoo/prepare.py
@@ -0,0 +1,315 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+# Single task with default config
+python environments/reasoning_gym_rewoo/prepare.py --task knights_knaves --size 500 --seed 42 --output data/train.jsonl
+
+# Multiple tasks (creates composite dataset with equal weights)
+python environments/reasoning_gym_rewoo/prepare.py --tasks knights_knaves,syllogism --size 500 --output data/train.jsonl
+
+# All tasks in a category
+python environments/reasoning_gym_rewoo/prepare.py --category logic --size 500 --output data/train.jsonl
+
+# All tasks from all categories
+python environments/reasoning_gym_rewoo/prepare.py --all-tasks --size 5000 --output data/train_all.jsonl
+
+# Single task with custom config
+python environments/reasoning_gym_rewoo/prepare.py --task knights_knaves --size 500 --config '{"n_people": 3}' --output data/train.jsonl
+"""
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+import reasoning_gym
+from reasoning_gym.composite import DatasetSpec
+
+
+TASK_CATEGORIES = {
+    "logic": [
+        "knights_knaves",
+        "syllogism",
+        "propositional_logic",
+        "zebra_puzzles",
+        "aiw",
+        "circuit_logic",
+        "self_reference",
+    ],
+    "arithmetic": [
+        "basic_arithmetic",
+        "chain_sum",
+        "leg_counting",
+        "prime_factorization",
+        "gcd",
+        "lcm",
+        "time_intervals",
+        "calendar_arithmetic",
+        "dice",
+        "products",
+        "decimal_chain_sum",
+        "decimal_arithmetic",
+        "count_bits",
+        "bitwise_arithmetic",
+        "number_format",
+        "fraction_simplification",
+        "power_function",
+        "gsm_symbolic",
+    ],
+    "algebra": [
+        "simple_equations",
+        "polynomial_equations",
+        "polynomial_multiplication",
+        "simple_integration",
+        "intermediate_integration",
+        "complex_arithmetic",
+    ],
+    "algorithmic": [
+        "word_sorting",
+        "string_insertion",
+        "sentence_reordering",
+        "count_primes",
+        "binary_alternation",
+        "manipulate_matrix",
+        "pool_matrix",
+        "base_conversion",
+        "jugs",
+        "graph_color",
+        "spiral_matrix",
+        "cryptarithm",
+        "ab",
+        "spell_backward",
+        "letter_counting",
+        "letter_jumble",
+        "number_sorting",
+        "caesar_cipher",
+        "rotate_matrix",
+        "rotten_oranges",
+        "palindrome_generation",
+        "palindrome_partitioning",
+        "game_of_life",
+        "game_of_life_halting",
+        "group_anagrams",
+        "isomorphic_strings",
+        "string_synthesis",
+        "binary_matrix",
+        "word_sequence_reversal",
+        "number_filtering",
+        "word_ladder",
+        "string_manipulation",
+        "ransom_note",
+        "string_splitting",
+    ],
+    "cognition": [
+        "figlet_font",
+        "needle_haystack",
+        "modulo_grid",
+        "number_sequence",
+        "rubiks_cube",
+        "color_cube_rotation",
+        "rectangle_count",
+    ],
+    "arc": [
+        "arc_1d",
+        "arc_agi",
+        "rearc",
+    ],
+    "code": [
+        "bf",
+        "codeio",
+    ],
+    "games": [
+        "boxnet",
+        "countdown",
+        "emoji_mystery",
+        "futoshiki",
+        "kakurasu",
+        "knight_swap",
+        "mahjong_puzzle",
+        "maze",
+        "mini_sudoku",
+        "n_queens",
+        "puzzle24",
+        "rush_hour",
+        "sokoban",
+        "sudoku",
+        "survo",
+        "tower_of_hanoi",
+        "tsumego",
+    ],
+    "geometry": [
+        "advanced_geometry",
+        "simple_geometry",
+    ],
+    "graphs": [
+        "course_schedule",
+        "family_relationships",
+        "largest_island",
+        "quantum_lock",
+        "shortest_path",
+    ],
+    "induction": [
+        "acre",
+        "list_functions",
+    ],
+    "probability": [
+        "coin_flip",
+    ],
+}
+
+
+def format_entry_to_nemo_gym(entry: dict) -> dict:
+    return {
+        "responses_create_params": {
+            "input": [{"role": "user", "content": entry["question"]}],
+        },
+        **entry,
+        "agent_ref": {"type": "responses_api_agents", "name": "reasoning_gym_rewoo_agent"},
+    }
+
+
+def create_single_task_dataset(task_name: str, size: int, seed: int, config: dict = None) -> list[dict]:
+    config = config or {}
+    config["size"] = size
+    config["seed"] = seed
+
+    print(f"Creating {task_name} dataset with {size} samples (seed={seed})...")
+    try:
+        dataset = reasoning_gym.create_dataset(task_name, **config)
+    except Exception as e:
+        print(f"Error creating dataset {task_name}: {e}")
+        return []
+
+    entries = []
+    for i in range(size):
+        try:
+            entry = dataset[i]
+            entries.append(format_entry_to_nemo_gym(entry))
+        except Exception as e:
+            print(f"Error generating entry {i} for {task_name}: {e}")
+            continue
+
+    print(f"Generated {len(entries)} entries for {task_name}")
+    return entries
+
+
+def create_composite_dataset(task_names: list[str], size: int, seed: int, config: dict = None) -> list[dict]:
+    config = config or {}
+
+    specs = [DatasetSpec(name=name, weight=1.0, config=config.get(name, {})) for name in task_names]
+
+    print(f"Creating composite dataset with {len(task_names)} tasks: {', '.join(task_names)}")
+    print(f"Total samples: {size}, seed: {seed}")
+
+    try:
+        dataset = reasoning_gym.create_dataset("composite", size=size, seed=seed, datasets=specs)
+    except Exception as e:
+        print(f"Error creating composite dataset: {e}")
+        return []
+
+    entries = []
+    for i in range(size):
+        try:
+            entry = dataset[i]
+            entries.append(format_entry_to_nemo_gym(entry))
+        except Exception as e:
+            print(f"Error generating entry {i}: {e}")
+            continue
+
+    task_counts = {}
+    for entry in entries:
+        task = entry["metadata"]["source_dataset"]
+        task_counts[task] = task_counts.get(task, 0) + 1
+
+    print(f"\nGenerated {len(entries)} total entries")
+    print("Task distribution:")
+    for task, count in sorted(task_counts.items()):
+        print(f"  {task}: {count}")
+
+    return entries
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Create reasoning_gym datasets in NeMo Gym format",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__,
+    )
+
+    task_group = parser.add_mutually_exclusive_group(required=True)
+    task_group.add_argument("--task", type=str, help="Single task name (e.g., knights_knaves)")
+    task_group.add_argument("--tasks", type=str, help="Comma-separated list of tasks")
+    task_group.add_argument(
+        "--category", type=str, choices=list(TASK_CATEGORIES.keys()), help="Task category (e.g., logic, arithmetic)"
+    )
+    task_group.add_argument("--all-tasks", action="store_true", help="Use all tasks from all categories")
+
+    parser.add_argument("--size", type=int, default=500, help="Number of samples to generate (default: 500)")
+    parser.add_argument("--seed", type=int, default=42, help="Random seed (default: 42)")
+    parser.add_argument(
+        "--config",
+        type=str,
+        default="{}",
+        help="Task-specific config as JSON string (e.g., '{\"n_people\": 3}')",
+    )
+
+    parser.add_argument(
+        "--output", type=str, required=True, help="Output JSONL file path (e.g., data/train_knights_knaves.jsonl)"
+    )
+
+    args = parser.parse_args()
+
+    try:
+        config = json.loads(args.config)
+    except json.JSONDecodeError as e:
+        print(f"Error parsing config JSON: {e}")
+        sys.exit(1)
+
+    if args.task:
+        task_names = [args.task]
+    elif args.tasks:
+        task_names = [t.strip() for t in args.tasks.split(",")]
+    elif args.category:
+        task_names = TASK_CATEGORIES[args.category]
+    elif args.all_tasks:
+        task_names = []
+        for category_tasks in TASK_CATEGORIES.values():
+            task_names.extend(category_tasks)
+    else:
+        print("Error: Must specify --task, --tasks, --category, or --all-tasks")
+        sys.exit(1)
+
+    if len(task_names) == 1:
+        entries = create_single_task_dataset(task_names[0], args.size, args.seed, config)
+    else:
+        entries = create_composite_dataset(task_names, args.size, args.seed, config)
+
+    if not entries:
+        print("Error: No entries generated")
+        sys.exit(1)
+
+    output_path = Path(args.output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with open(output_path, "w") as f:
+        for entry in entries:
+            f.write(json.dumps(entry) + "\n")
+
+    print(f"\nDataset saved to {output_path}")
+    print(f"Total entries: {len(entries)}")
+
+
+if __name__ == "__main__":
+    main()