NVIDIA-NeMo · cmunley1 · May 29, 2026 · May 29, 2026
diff --git a/environments/code_gen/README.md b/environments/code_gen/README.md
@@ -0,0 +1,39 @@
+# Competitive Coding Resources Server
+
+### Overview
+Verifies competitive programming solutions by executing submitted code against unit tests. The server consumes agent trajectories and returns a reward based on whether the assistant's code produces the correct outputs for given test inputs.
+
+The dataset is in preparation and the example data can be found in `data/example.jsonl`.
+
+### Input schema
+- `responses_create_params`: OpenAI Responses create params
+  - Use only a user message with the problem statement and instructions (e.g., "You are an expert competitive programmer...").
+  - `verifier_metadata` (required):
+    - `unit_tests` (required): dict with `inputs` and `outputs` arrays containing test cases.
+      - `inputs`: list of strings representing stdin input for each test case
+      - `outputs`: list of strings representing expected stdout output for each test case
+
+**Notes**
+- All test cases must pass for a solution to receive a reward of 1.0
+- Failed test cases result in a reward of 0.0 with detailed error information
+
+### Test execution (for now)
+We use the LiveCodeBench execution code.
+
+### Example of rollouts and usage
+
+```bash
+# Running the server
+config_paths="responses_api_models/openai_model/configs/openai_model.yaml,\
+environments/code_gen/config.yaml"
+ng_run "+config_paths=[$config_paths]"
+
+# Collect rollouts from example problems
+ng_collect_rollouts +agent_name=code_gen_simple_agent \
+    +input_jsonl_fpath=environments/code_gen/data/example.jsonl \
+    +output_jsonl_fpath=environments/code_gen/data/example_rollouts.jsonl \
+    +limit=null
+```
+
+## Licensing information
+Apache 2.0
diff --git a/environments/code_gen/__init__.py b/environments/code_gen/__init__.py
diff --git a/environments/code_gen/config.yaml b/environments/code_gen/config.yaml
@@ -0,0 +1,42 @@
+code_gen:
+  resources_servers:
+    code_gen:
+      entrypoint: app.py
+      domain: coding
+      verified: true
+      verified_url: https://wandb.ai/nvidia/bxyu-nemo-gym-rl-integration-20250926/runs/54uzarwq
+      description: Model must submit the right code to solve a problem
+      value: Improve competitive coding capabilities
+      num_processes: 8
+      unit_test_timeout_secs: 10
+      debug: false
+code_gen_simple_agent:
+  responses_api_agents:
+    simple_agent:
+      entrypoint: app.py
+      resources_server:
+        type: resources_servers
+        name: code_gen
+      model_server:
+        type: responses_api_models
+        name: policy_model
+      datasets:
+      - name: opencodereasoning_filtered_train
+        type: train
+        jsonl_fpath: environments/code_gen/data/opencodereasoning_filtered_25k_train.jsonl
+        huggingface_identifier:
+          repo_id: nvidia/nemotron-RL-coding-competitive_coding
+          artifact_fpath: opencodereasoning_filtered_25k_train.jsonl
+        license: Apache 2.0
+        num_repeats: 1
+      - name: livecodebench_v5_validation
+        type: validation
+        jsonl_fpath: environments/code_gen/data/livecodebench_v5_2024-07-01_2025-02-01_validation.jsonl
+        huggingface_identifier:
+          repo_id: nvidia/nemotron-RL-coding-competitive_coding
+          artifact_fpath: validation.jsonl
+        license: MIT
+        num_repeats: 10  # Use livecodebench setting, even if sampling parameters differ.
+      - name: example
+        type: example
+        jsonl_fpath: environments/code_gen/data/example.jsonl
diff --git a/environments/code_gen/data/.gitignore b/environments/code_gen/data/.gitignore
@@ -0,0 +1,5 @@
+*train.jsonl
+*validation.jsonl
+*train_prepare.jsonl
+*validation_prepare.jsonl
+*example_prepare.jsonl
diff --git a/environments/code_gen/data/example.jsonl b/environments/code_gen/data/example.jsonl
diff --git a/environments/code_gen/data/example_metrics.json b/environments/code_gen/data/example_metrics.json
@@ -0,0 +1,49 @@
+{
+    "name": "example",
+    "type": "example",
+    "jsonl_fpath": "resources_servers/code_gen/data/example.jsonl",
+    "num_repeats": 1,
+    "gitlab_identifier": null,
+    "license": null,
+    "Number of examples": 5,
+    "Number of tools": {
+        "Total # non-null values": 0,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Standard deviation": 0.0
+    },
+    "Json-dumped number of words (proxy for token count)": {
+        "Total # non-null values": 5,
+        "Average": 468.0,
+        "Min": 359.0,
+        "Max": 553.0,
+        "Standard deviation": 79.76
+    },
+    "Number of turns": {
+        "Total # non-null values": 5,
+        "Average": 1.0,
+        "Min": 1.0,
+        "Max": 1.0,
+        "Standard deviation": 0.0
+    },
+    "Temperature": {
+        "Total # non-null values": 0,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Standard deviation": 0.0
+    },
+    "hash_id": {
+        "unique_count": 5,
+        "total_count": 5
+    },
+    "dataset": {
+        "unique_count": 1,
+        "total_count": 5
+    },
+    "source": {
+        "unique_count": 1,
+        "total_count": 5
+    }
+}
diff --git a/environments/code_gen/data/example_rollouts.jsonl b/environments/code_gen/data/example_rollouts.jsonl
diff --git a/environments/code_gen/prepare.py b/environments/code_gen/prepare.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2026, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Download competitive coding training and validation data from HuggingFace.
+
+Usage:
+    python environments/code_gen/prepare.py
+    python environments/code_gen/prepare.py --split train
+    python environments/code_gen/prepare.py --split validation
+"""
+
+import argparse
+from pathlib import Path
+
+
+REPO_ID = "nvidia/nemotron-RL-coding-competitive_coding"
+
+SPLITS = {
+    "train": "opencodereasoning_filtered_25k_train.jsonl",
+    "validation": "livecodebench_v5_2024-07-01_2025-02-01_validation.jsonl",
+}
+
+ARTIFACTS = {
+    "train": "opencodereasoning_filtered_25k_train.jsonl",
+    "validation": "validation.jsonl",
+}
+
+
+def prepare(split: str) -> None:
+    try:
+        from huggingface_hub import hf_hub_download
+    except ImportError:
+        raise ImportError("pip install huggingface_hub")
+
+    output_path = Path(__file__).parent / "data" / SPLITS[split]
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    downloaded = hf_hub_download(repo_id=REPO_ID, filename=ARTIFACTS[split], repo_type="dataset")
+    Path(downloaded).rename(output_path)
+    print(f"Wrote {output_path}")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--split", default="train", choices=list(SPLITS))
+    args = parser.parse_args()
+    prepare(args.split)
+
+
+if __name__ == "__main__":
+    main()