NVIDIA-NeMo · cmunley1 · May 29, 2026 · May 29, 2026 · May 29, 2026
diff --git a/environments/abstention/README.md b/environments/abstention/README.md
@@ -0,0 +1,50 @@
+# Description
+
+This is an environment that trains a policy model to abstain from answering when unsure rather than hallucinating. It uses a three-tier reward scheme:
+
+- **Correct** (1.0): The model provides a correct answer
+- **Abstain** (configurable, default 0.5): The model outputs `\boxed{[IDK]}` or the LLM judge grades the answer as NOT_ATTEMPTED
+- **Incorrect** (0.0): The model provides an incorrect answer
+
+Correctness is verified by an LLM judge using the OMNISCIENCE_GRADER template instead of string matching. The judge grades the model's extracted answer against the gold target as one of CORRECT, INCORRECT, or NOT_ATTEMPTED.
+
+The dataset used is [HotPotQA](https://hotpotqa.github.io/) (fullwiki split).
+
+# Example usage
+
+## Running servers
+
+```bash
+config_paths="responses_api_models/openai_model/configs/openai_model.yaml, \
+environments/abstention/config.yaml"
+ng_run "+config_paths=[$config_paths]" \
+    +abstention.resources_servers.abstention.judge_model_server.name=policy_model
+```
+
+## Collecting rollouts
+
+```bash
+ng_collect_rollouts \
+    +agent_name=abstention_simple_agent \
+    +input_jsonl_fpath=environments/abstention/data/example.jsonl \
+    +output_jsonl_fpath=results/abstention_verify_responses.jsonl \
+    +limit=3
+```
+
+## Preprocessing HotPotQA data
+
+```bash
+python environments/abstention/prepare.py \
+    --download \
+    --raw-data-dir /path/to/data/hotpotqa \
+    --output-dir environments/abstention/data
+```
+
+# Licensing information
+
+Code: Apache 2.0
+Data:
+- HotPotQA: Creative Commons Attribution-ShareAlike 4.0 International
+
+Dependencies:
+- nemo_gym: Apache 2.0
diff --git a/environments/abstention/__init__.py b/environments/abstention/__init__.py
diff --git a/environments/abstention/config.yaml b/environments/abstention/config.yaml
@@ -0,0 +1,57 @@
+# Abstention Environment
+# Trains a policy model to abstain from answering when unsure rather than
+# hallucinating.  Uses a three-tier reward: correct (1.0) > abstain (lambda) >
+# incorrect (0.0).  Verification uses an LLM judge (OMNISCIENCE_GRADER) instead
+# of string matching.
+
+abstention:
+  resources_servers:
+    abstention:
+      entrypoint: app.py
+      domain: rlhf
+      verified: false
+      description: Train models to abstain when unsure using three-tier reward on HotPotQA with LLM judge
+      value: Improve calibration by rewarding abstention over incorrect answers
+
+      # LLM judge model server
+      judge_model_server:
+        type: responses_api_models
+        name: genrm_model
+      judge_responses_create_params:
+        input: []
+        max_output_tokens: 64
+        temperature: 0.0
+        top_p: 1.0
+
+      # Reward for abstaining (lambda). Must satisfy 0 < lambda < 1.
+      abstention_reward: 0.5
+
+      # Token the model outputs inside \boxed{} to signal abstention
+      abstention_token: "[IDK]"
+
+      # Reward for correct and incorrect answers
+      correct_reward: 1.0
+      incorrect_reward: 0.0
+
+abstention_simple_agent:
+  responses_api_agents:
+    simple_agent:
+      entrypoint: app.py
+      resources_server:
+        type: resources_servers
+        name: abstention
+      model_server:
+        type: responses_api_models
+        name: policy_model
+      datasets:
+        - name: hotpotqa_train
+          type: train
+          jsonl_fpath: environments/abstention/data/hotpotqa_train.jsonl
+          license: Creative Commons Attribution-ShareAlike 4.0 International
+        - name: hotpotqa_val
+          type: validation
+          jsonl_fpath: environments/abstention/data/hotpotqa_val.jsonl
+          license: Creative Commons Attribution-ShareAlike 4.0 International
+        - name: example
+          type: example
+          jsonl_fpath: environments/abstention/data/example.jsonl
diff --git a/environments/abstention/data/.gitignore b/environments/abstention/data/.gitignore
@@ -0,0 +1,6 @@
+*train.jsonl
+*validation.jsonl
+*val.jsonl
+*train_prepare.jsonl
+*validation_prepare.jsonl
+*example_prepare.jsonl
diff --git a/environments/abstention/data/example.jsonl b/environments/abstention/data/example.jsonl
@@ -0,0 +1,5 @@
+{"id": "example_0", "question": "Were Scott Derrickson and Ed Wood of the same nationality?", "answer": "Yes", "agent_ref": {"type": "responses_api_agents", "name": "abstention_simple_agent"}, "responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "Were Scott Derrickson and Ed Wood of the same nationality?"}], "tools": [], "parallel_tool_calls": false}}
+{"id": "example_1", "question": "What government position was held by the woman who portrayed Vivien Harmon in Murder House?", "answer": "United Nations Goodwill Ambassador", "agent_ref": {"type": "responses_api_agents", "name": "abstention_simple_agent"}, "responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "What government position was held by the woman who portrayed Vivien Harmon in Murder House?"}], "tools": [], "parallel_tool_calls": false}}
+{"id": "example_2", "question": "Which magazine was started first, Arthur's Magazine or First for Women?", "answer": "Arthur's Magazine", "agent_ref": {"type": "responses_api_agents", "name": "abstention_simple_agent"}, "responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "Which magazine was started first, Arthur's Magazine or First for Women?"}], "tools": [], "parallel_tool_calls": false}}
+{"id": "example_3", "question": "The director of the romantic comedy \"Big Stone Gap\" is based in what New York City neighborhood?", "answer": "Greenwich Village", "agent_ref": {"type": "responses_api_agents", "name": "abstention_simple_agent"}, "responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "The director of the romantic comedy \"Big Stone Gap\" is based in what New York City neighborhood?"}], "tools": [], "parallel_tool_calls": false}}
+{"id": "example_4", "question": "Who was known by his stage name combiz Ransen, an Iranian-Canadian film director?", "answer": "Ransen Kamangar", "agent_ref": {"type": "responses_api_agents", "name": "abstention_simple_agent"}, "responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "Who was known by his stage name combiz Ransen, an Iranian-Canadian film director?"}], "tools": [], "parallel_tool_calls": false}}
diff --git a/environments/abstention/data/example_metrics.json b/environments/abstention/data/example_metrics.json
@@ -0,0 +1,50 @@
+{
+    "name": "example",
+    "type": "example",
+    "jsonl_fpath": "resources_servers/abstention/data/example.jsonl",
+    "num_repeats": 1,
+    "gitlab_identifier": null,
+    "huggingface_identifier": null,
+    "license": null,
+    "Number of examples": 5,
+    "Number of tools": {
+        "Total # non-null values": 5,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Standard deviation": 0.0
+    },
+    "Json-dumped number of words (proxy for token count)": {
+        "Total # non-null values": 5,
+        "Average": 78.2,
+        "Min": 75.0,
+        "Max": 82.0,
+        "Standard deviation": 2.86
+    },
+    "Number of turns": {
+        "Total # non-null values": 5,
+        "Average": 1.0,
+        "Min": 1.0,
+        "Max": 1.0,
+        "Standard deviation": 0.0
+    },
+    "Temperature": {
+        "Total # non-null values": 0,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Standard deviation": 0.0
+    },
+    "id": {
+        "unique_count": 5,
+        "total_count": 5
+    },
+    "question": {
+        "unique_count": 5,
+        "total_count": 5
+    },
+    "answer": {
+        "unique_count": 5,
+        "total_count": 5
+    }
+}
diff --git a/environments/abstention/data/example_rollouts.jsonl b/environments/abstention/data/example_rollouts.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "Were Scott Derrickson and Ed Wood of the same nationality?"}], "tools": [], "parallel_tool_calls": false}, "response": {"id": "resp_example_0", "created_at": 1700000000.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "synthetic-example", "object": "response", "output": [{"id": "msg_example_0", "content": [{"annotations": [], "text": "\\boxed{Yes}", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": false, "temperature": 1.0, "tool_choice": "auto", "tools": [], "top_p": 1.0, "background": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": "completed", "text": null, "top_logprobs": null, "truncation": null, "usage": {"input_tokens": 100, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 20, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 120}, "user": null}, "reward": 1.0, "id": "example_0", "question": "Were Scott Derrickson and Ed Wood of the same nationality?", "answer": "Yes", "extracted_answer": "Yes", "ground_truth": "Yes", "verdict": "correct", "judge_output": "A", "is_correct": 1.0, "is_abstain": 0.0, "is_incorrect": 0.0, "omniscience_index": 1.0}
+{"responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "What government position was held by the woman who portrayed Vivien Harmon in Murder House?"}], "tools": [], "parallel_tool_calls": false}, "response": {"id": "resp_example_1", "created_at": 1700000000.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "synthetic-example", "object": "response", "output": [{"id": "msg_example_1", "content": [{"annotations": [], "text": "\\boxed{United Nations Goodwill Ambassador}", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": false, "temperature": 1.0, "tool_choice": "auto", "tools": [], "top_p": 1.0, "background": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": "completed", "text": null, "top_logprobs": null, "truncation": null, "usage": {"input_tokens": 100, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 20, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 120}, "user": null}, "reward": 1.0, "id": "example_1", "question": "What government position was held by the woman who portrayed Vivien Harmon in Murder House?", "answer": "United Nations Goodwill Ambassador", "extracted_answer": "United Nations Goodwill Ambassador", "ground_truth": "United Nations Goodwill Ambassador", "verdict": "correct", "judge_output": "A", "is_correct": 1.0, "is_abstain": 0.0, "is_incorrect": 0.0, "omniscience_index": 1.0}
+{"responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "Which magazine was started first, Arthur's Magazine or First for Women?"}], "tools": [], "parallel_tool_calls": false}, "response": {"id": "resp_example_2", "created_at": 1700000000.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "synthetic-example", "object": "response", "output": [{"id": "msg_example_2", "content": [{"annotations": [], "text": "\\boxed{Arthur's Magazine}", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": false, "temperature": 1.0, "tool_choice": "auto", "tools": [], "top_p": 1.0, "background": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": "completed", "text": null, "top_logprobs": null, "truncation": null, "usage": {"input_tokens": 100, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 20, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 120}, "user": null}, "reward": 1.0, "id": "example_2", "question": "Which magazine was started first, Arthur's Magazine or First for Women?", "answer": "Arthur's Magazine", "extracted_answer": "Arthur's Magazine", "ground_truth": "Arthur's Magazine", "verdict": "correct", "judge_output": "A", "is_correct": 1.0, "is_abstain": 0.0, "is_incorrect": 0.0, "omniscience_index": 1.0}
+{"responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "The director of the romantic comedy \"Big Stone Gap\" is based in what New York City neighborhood?"}], "tools": [], "parallel_tool_calls": false}, "response": {"id": "resp_example_3", "created_at": 1700000000.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "synthetic-example", "object": "response", "output": [{"id": "msg_example_3", "content": [{"annotations": [], "text": "\\boxed{Greenwich Village}", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": false, "temperature": 1.0, "tool_choice": "auto", "tools": [], "top_p": 1.0, "background": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": "completed", "text": null, "top_logprobs": null, "truncation": null, "usage": {"input_tokens": 100, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 20, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 120}, "user": null}, "reward": 1.0, "id": "example_3", "question": "The director of the romantic comedy \"Big Stone Gap\" is based in what New York City neighborhood?", "answer": "Greenwich Village", "extracted_answer": "Greenwich Village", "ground_truth": "Greenwich Village", "verdict": "correct", "judge_output": "A", "is_correct": 1.0, "is_abstain": 0.0, "is_incorrect": 0.0, "omniscience_index": 1.0}
+{"responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "Who was known by his stage name combiz Ransen, an Iranian-Canadian film director?"}], "tools": [], "parallel_tool_calls": false}, "response": {"id": "resp_example_4", "created_at": 1700000000.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "synthetic-example", "object": "response", "output": [{"id": "msg_example_4", "content": [{"annotations": [], "text": "\\boxed{[IDK]}", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": false, "temperature": 1.0, "tool_choice": "auto", "tools": [], "top_p": 1.0, "background": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": "completed", "text": null, "top_logprobs": null, "truncation": null, "usage": {"input_tokens": 100, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 20, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 120}, "user": null}, "reward": 0.5, "id": "example_4", "question": "Who was known by his stage name combiz Ransen, an Iranian-Canadian film director?", "answer": "Ransen Kamangar", "extracted_answer": "[IDK]", "ground_truth": "Ransen Kamangar", "verdict": "abstain", "judge_output": null, "is_correct": 0.0, "is_abstain": 1.0, "is_incorrect": 0.0, "omniscience_index": 0.0}