diff --git a/environments/abstention/README.md b/environments/abstention/README.md new file mode 100644 index 000000000..cd68ed897 --- /dev/null +++ b/environments/abstention/README.md @@ -0,0 +1,50 @@ +# Description + +This is an environment that trains a policy model to abstain from answering when unsure rather than hallucinating. It uses a three-tier reward scheme: + +- **Correct** (1.0): The model provides a correct answer +- **Abstain** (configurable, default 0.5): The model outputs `\boxed{[IDK]}` or the LLM judge grades the answer as NOT_ATTEMPTED +- **Incorrect** (0.0): The model provides an incorrect answer + +Correctness is verified by an LLM judge using the OMNISCIENCE_GRADER template instead of string matching. The judge grades the model's extracted answer against the gold target as one of CORRECT, INCORRECT, or NOT_ATTEMPTED. + +The dataset used is [HotPotQA](https://hotpotqa.github.io/) (fullwiki split). + +# Example usage + +## Running servers + +```bash +config_paths="responses_api_models/openai_model/configs/openai_model.yaml, \ +environments/abstention/config.yaml" +ng_run "+config_paths=[$config_paths]" \ + +abstention.resources_servers.abstention.judge_model_server.name=policy_model +``` + +## Collecting rollouts + +```bash +ng_collect_rollouts \ + +agent_name=abstention_simple_agent \ + +input_jsonl_fpath=environments/abstention/data/example.jsonl \ + +output_jsonl_fpath=results/abstention_verify_responses.jsonl \ + +limit=3 +``` + +## Preprocessing HotPotQA data + +```bash +python environments/abstention/prepare.py \ + --download \ + --raw-data-dir /path/to/data/hotpotqa \ + --output-dir environments/abstention/data +``` + +# Licensing information + +Code: Apache 2.0 +Data: +- HotPotQA: Creative Commons Attribution-ShareAlike 4.0 International + +Dependencies: +- nemo_gym: Apache 2.0 diff --git a/environments/abstention/__init__.py b/environments/abstention/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/environments/abstention/config.yaml b/environments/abstention/config.yaml new file mode 100644 index 000000000..fa5281758 --- /dev/null +++ b/environments/abstention/config.yaml @@ -0,0 +1,57 @@ +# Abstention Environment +# Trains a policy model to abstain from answering when unsure rather than +# hallucinating. Uses a three-tier reward: correct (1.0) > abstain (lambda) > +# incorrect (0.0). Verification uses an LLM judge (OMNISCIENCE_GRADER) instead +# of string matching. + +abstention: + resources_servers: + abstention: + entrypoint: app.py + domain: rlhf + verified: false + description: Train models to abstain when unsure using three-tier reward on HotPotQA with LLM judge + value: Improve calibration by rewarding abstention over incorrect answers + + # LLM judge model server + judge_model_server: + type: responses_api_models + name: genrm_model + judge_responses_create_params: + input: [] + max_output_tokens: 64 + temperature: 0.0 + top_p: 1.0 + + # Reward for abstaining (lambda). Must satisfy 0 < lambda < 1. + abstention_reward: 0.5 + + # Token the model outputs inside \boxed{} to signal abstention + abstention_token: "[IDK]" + + # Reward for correct and incorrect answers + correct_reward: 1.0 + incorrect_reward: 0.0 + +abstention_simple_agent: + responses_api_agents: + simple_agent: + entrypoint: app.py + resources_server: + type: resources_servers + name: abstention + model_server: + type: responses_api_models + name: policy_model + datasets: + - name: hotpotqa_train + type: train + jsonl_fpath: environments/abstention/data/hotpotqa_train.jsonl + license: Creative Commons Attribution-ShareAlike 4.0 International + - name: hotpotqa_val + type: validation + jsonl_fpath: environments/abstention/data/hotpotqa_val.jsonl + license: Creative Commons Attribution-ShareAlike 4.0 International + - name: example + type: example + jsonl_fpath: environments/abstention/data/example.jsonl diff --git a/environments/abstention/data/.gitignore b/environments/abstention/data/.gitignore new file mode 100644 index 000000000..cf42462ab --- /dev/null +++ b/environments/abstention/data/.gitignore @@ -0,0 +1,6 @@ +*train.jsonl +*validation.jsonl +*val.jsonl +*train_prepare.jsonl +*validation_prepare.jsonl +*example_prepare.jsonl diff --git a/environments/abstention/data/example.jsonl b/environments/abstention/data/example.jsonl new file mode 100644 index 000000000..5dd7b275d --- /dev/null +++ b/environments/abstention/data/example.jsonl @@ -0,0 +1,5 @@ +{"id": "example_0", "question": "Were Scott Derrickson and Ed Wood of the same nationality?", "answer": "Yes", "agent_ref": {"type": "responses_api_agents", "name": "abstention_simple_agent"}, "responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "Were Scott Derrickson and Ed Wood of the same nationality?"}], "tools": [], "parallel_tool_calls": false}} +{"id": "example_1", "question": "What government position was held by the woman who portrayed Vivien Harmon in Murder House?", "answer": "United Nations Goodwill Ambassador", "agent_ref": {"type": "responses_api_agents", "name": "abstention_simple_agent"}, "responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "What government position was held by the woman who portrayed Vivien Harmon in Murder House?"}], "tools": [], "parallel_tool_calls": false}} +{"id": "example_2", "question": "Which magazine was started first, Arthur's Magazine or First for Women?", "answer": "Arthur's Magazine", "agent_ref": {"type": "responses_api_agents", "name": "abstention_simple_agent"}, "responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "Which magazine was started first, Arthur's Magazine or First for Women?"}], "tools": [], "parallel_tool_calls": false}} +{"id": "example_3", "question": "The director of the romantic comedy \"Big Stone Gap\" is based in what New York City neighborhood?", "answer": "Greenwich Village", "agent_ref": {"type": "responses_api_agents", "name": "abstention_simple_agent"}, "responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "The director of the romantic comedy \"Big Stone Gap\" is based in what New York City neighborhood?"}], "tools": [], "parallel_tool_calls": false}} +{"id": "example_4", "question": "Who was known by his stage name combiz Ransen, an Iranian-Canadian film director?", "answer": "Ransen Kamangar", "agent_ref": {"type": "responses_api_agents", "name": "abstention_simple_agent"}, "responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "Who was known by his stage name combiz Ransen, an Iranian-Canadian film director?"}], "tools": [], "parallel_tool_calls": false}} diff --git a/environments/abstention/data/example_metrics.json b/environments/abstention/data/example_metrics.json new file mode 100644 index 000000000..8e79cdb0e --- /dev/null +++ b/environments/abstention/data/example_metrics.json @@ -0,0 +1,50 @@ +{ + "name": "example", + "type": "example", + "jsonl_fpath": "resources_servers/abstention/data/example.jsonl", + "num_repeats": 1, + "gitlab_identifier": null, + "huggingface_identifier": null, + "license": null, + "Number of examples": 5, + "Number of tools": { + "Total # non-null values": 5, + "Average": 0.0, + "Min": 0.0, + "Max": 0.0, + "Standard deviation": 0.0 + }, + "Json-dumped number of words (proxy for token count)": { + "Total # non-null values": 5, + "Average": 78.2, + "Min": 75.0, + "Max": 82.0, + "Standard deviation": 2.86 + }, + "Number of turns": { + "Total # non-null values": 5, + "Average": 1.0, + "Min": 1.0, + "Max": 1.0, + "Standard deviation": 0.0 + }, + "Temperature": { + "Total # non-null values": 0, + "Average": 0.0, + "Min": 0.0, + "Max": 0.0, + "Standard deviation": 0.0 + }, + "id": { + "unique_count": 5, + "total_count": 5 + }, + "question": { + "unique_count": 5, + "total_count": 5 + }, + "answer": { + "unique_count": 5, + "total_count": 5 + } +} \ No newline at end of file diff --git a/environments/abstention/data/example_rollouts.jsonl b/environments/abstention/data/example_rollouts.jsonl new file mode 100644 index 000000000..ccf55a8e5 --- /dev/null +++ b/environments/abstention/data/example_rollouts.jsonl @@ -0,0 +1,5 @@ +{"responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "Were Scott Derrickson and Ed Wood of the same nationality?"}], "tools": [], "parallel_tool_calls": false}, "response": {"id": "resp_example_0", "created_at": 1700000000.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "synthetic-example", "object": "response", "output": [{"id": "msg_example_0", "content": [{"annotations": [], "text": "\\boxed{Yes}", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": false, "temperature": 1.0, "tool_choice": "auto", "tools": [], "top_p": 1.0, "background": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": "completed", "text": null, "top_logprobs": null, "truncation": null, "usage": {"input_tokens": 100, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 20, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 120}, "user": null}, "reward": 1.0, "id": "example_0", "question": "Were Scott Derrickson and Ed Wood of the same nationality?", "answer": "Yes", "extracted_answer": "Yes", "ground_truth": "Yes", "verdict": "correct", "judge_output": "A", "is_correct": 1.0, "is_abstain": 0.0, "is_incorrect": 0.0, "omniscience_index": 1.0} +{"responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "What government position was held by the woman who portrayed Vivien Harmon in Murder House?"}], "tools": [], "parallel_tool_calls": false}, "response": {"id": "resp_example_1", "created_at": 1700000000.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "synthetic-example", "object": "response", "output": [{"id": "msg_example_1", "content": [{"annotations": [], "text": "\\boxed{United Nations Goodwill Ambassador}", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": false, "temperature": 1.0, "tool_choice": "auto", "tools": [], "top_p": 1.0, "background": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": "completed", "text": null, "top_logprobs": null, "truncation": null, "usage": {"input_tokens": 100, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 20, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 120}, "user": null}, "reward": 1.0, "id": "example_1", "question": "What government position was held by the woman who portrayed Vivien Harmon in Murder House?", "answer": "United Nations Goodwill Ambassador", "extracted_answer": "United Nations Goodwill Ambassador", "ground_truth": "United Nations Goodwill Ambassador", "verdict": "correct", "judge_output": "A", "is_correct": 1.0, "is_abstain": 0.0, "is_incorrect": 0.0, "omniscience_index": 1.0} +{"responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "Which magazine was started first, Arthur's Magazine or First for Women?"}], "tools": [], "parallel_tool_calls": false}, "response": {"id": "resp_example_2", "created_at": 1700000000.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "synthetic-example", "object": "response", "output": [{"id": "msg_example_2", "content": [{"annotations": [], "text": "\\boxed{Arthur's Magazine}", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": false, "temperature": 1.0, "tool_choice": "auto", "tools": [], "top_p": 1.0, "background": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": "completed", "text": null, "top_logprobs": null, "truncation": null, "usage": {"input_tokens": 100, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 20, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 120}, "user": null}, "reward": 1.0, "id": "example_2", "question": "Which magazine was started first, Arthur's Magazine or First for Women?", "answer": "Arthur's Magazine", "extracted_answer": "Arthur's Magazine", "ground_truth": "Arthur's Magazine", "verdict": "correct", "judge_output": "A", "is_correct": 1.0, "is_abstain": 0.0, "is_incorrect": 0.0, "omniscience_index": 1.0} +{"responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "The director of the romantic comedy \"Big Stone Gap\" is based in what New York City neighborhood?"}], "tools": [], "parallel_tool_calls": false}, "response": {"id": "resp_example_3", "created_at": 1700000000.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "synthetic-example", "object": "response", "output": [{"id": "msg_example_3", "content": [{"annotations": [], "text": "\\boxed{Greenwich Village}", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": false, "temperature": 1.0, "tool_choice": "auto", "tools": [], "top_p": 1.0, "background": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": "completed", "text": null, "top_logprobs": null, "truncation": null, "usage": {"input_tokens": 100, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 20, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 120}, "user": null}, "reward": 1.0, "id": "example_3", "question": "The director of the romantic comedy \"Big Stone Gap\" is based in what New York City neighborhood?", "answer": "Greenwich Village", "extracted_answer": "Greenwich Village", "ground_truth": "Greenwich Village", "verdict": "correct", "judge_output": "A", "is_correct": 1.0, "is_abstain": 0.0, "is_incorrect": 0.0, "omniscience_index": 1.0} +{"responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "Who was known by his stage name combiz Ransen, an Iranian-Canadian film director?"}], "tools": [], "parallel_tool_calls": false}, "response": {"id": "resp_example_4", "created_at": 1700000000.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "synthetic-example", "object": "response", "output": [{"id": "msg_example_4", "content": [{"annotations": [], "text": "\\boxed{[IDK]}", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": false, "temperature": 1.0, "tool_choice": "auto", "tools": [], "top_p": 1.0, "background": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": "completed", "text": null, "top_logprobs": null, "truncation": null, "usage": {"input_tokens": 100, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 20, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 120}, "user": null}, "reward": 0.5, "id": "example_4", "question": "Who was known by his stage name combiz Ransen, an Iranian-Canadian film director?", "answer": "Ransen Kamangar", "extracted_answer": "[IDK]", "ground_truth": "Ransen Kamangar", "verdict": "abstain", "judge_output": null, "is_correct": 0.0, "is_abstain": 1.0, "is_incorrect": 0.0, "omniscience_index": 0.0} diff --git a/environments/abstention/prepare.py b/environments/abstention/prepare.py new file mode 100644 index 000000000..0221ee442 --- /dev/null +++ b/environments/abstention/prepare.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Preprocess HotPotQA into JSONL for the abstention environment. + +Downloads the HotPotQA dataset from HuggingFace if --download is passed, +then converts the raw data into the JSONL format expected by the +abstention resources server. This version: + + - Includes the ground-truth ``answer`` field in every record (needed by + the LLM judge). + - Injects a system prompt instructing the model to use \\boxed{} format + and \\boxed{[IDK]} when unsure. + +Usage: + # Download and preprocess in one step + python dataset_preprocess.py \\ + --download \\ + --raw-data-dir /path/to/data/hotpotqa \\ + --output-dir ./data + + # Preprocess only (data already downloaded) + python dataset_preprocess.py \\ + --raw-data-dir /path/to/data/hotpotqa \\ + --output-dir ./data +""" + +from __future__ import annotations + +import argparse +import json +import os +from pathlib import Path + + +SYSTEM_PROMPT = ( + "You will be given a question, answer with JUST the answer (no explanation).\n" + "Put your final answer in \\boxed{your answer} format.\n" + "If you do not know the answer, or you need more context or tools to answer the question,\n" + "output \\boxed{[IDK]} instead of guessing.\n" + "It is better to not provide an answer when unsure than to be wrong." +) + + +def download_hotpotqa(raw_data_dir: str, splits: list[str] | None = None) -> None: + """Download HotPotQA fullwiki from HuggingFace and save as JSONL.""" + from datasets import load_dataset + + os.makedirs(raw_data_dir, exist_ok=True) + splits = splits or ["train", "validation"] + + for split in splits: + out_path = os.path.join(raw_data_dir, f"fullwiki_{split}.jsonl") + if os.path.exists(out_path): + print(f" {out_path} already exists, skipping download.") + continue + print(f" Downloading HotPotQA fullwiki {split}...") + ds = load_dataset("hotpotqa/hotpot_qa", "fullwiki", split=split, cache_dir=raw_data_dir) + ds.to_json(out_path) + print(f" Saved {len(ds)} records to {out_path}") + + +def build_record(row: dict, idx: int) -> dict: + """Convert a single HotPotQA row into an abstention record.""" + question = row["question"] + answer = row["answer"] + record_id = row.get("id", idx) + + return { + "id": record_id, + "question": question, + "answer": answer, + "agent_ref": { + "type": "responses_api_agents", + "name": "abstention_simple_agent", + }, + "responses_create_params": { + "input": [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": question}, + ], + "tools": [], + "parallel_tool_calls": False, + }, + } + + +def preprocess_split( + raw_jsonl_path: Path, + output_path: Path, + max_samples: int | None = None, +) -> int: + """Read a raw HotPotQA JSONL and write the preprocessed version.""" + count = 0 + with open(raw_jsonl_path, "r", encoding="utf-8") as fin, open(output_path, "w", encoding="utf-8") as fout: + for idx, line in enumerate(fin): + if max_samples is not None and count >= max_samples: + break + row = json.loads(line) + record = build_record(row, idx) + fout.write(json.dumps(record, ensure_ascii=False) + "\n") + count += 1 + return count + + +def main(): + parser = argparse.ArgumentParser(description="Preprocess HotPotQA for the abstention environment") + parser.add_argument( + "--download", + action="store_true", + help="Download the HotPotQA dataset from HuggingFace first.", + ) + parser.add_argument( + "--raw-data-dir", + type=Path, + required=True, + help="Directory containing (or to download to) raw HotPotQA JSONL files.", + ) + parser.add_argument( + "--output-dir", + type=Path, + default=Path(__file__).parent / "data", + help="Output directory for preprocessed JSONL files (default: ./data).", + ) + parser.add_argument( + "--max-train-samples", + type=int, + default=None, + help="Limit the number of training samples (default: use all).", + ) + parser.add_argument( + "--max-val-samples", + type=int, + default=None, + help="Limit the number of validation samples (default: use all).", + ) + args = parser.parse_args() + + if args.download: + print("Step 1: Downloading HotPotQA from HuggingFace...") + download_hotpotqa(str(args.raw_data_dir)) + else: + print("Skipping download (use --download to fetch from HuggingFace).") + + args.output_dir.mkdir(parents=True, exist_ok=True) + + splits = [ + ("fullwiki_train.jsonl", "hotpotqa_train.jsonl", args.max_train_samples), + ("fullwiki_validation.jsonl", "hotpotqa_val.jsonl", args.max_val_samples), + ] + + for raw_name, out_name, max_samples in splits: + raw_path = args.raw_data_dir / raw_name + out_path = args.output_dir / out_name + if not raw_path.exists(): + print(f"Warning: {raw_path} not found, skipping.") + continue + print(f"Preprocessing {raw_path} -> {out_path}...") + count = preprocess_split(raw_path, out_path, max_samples) + print(f" Wrote {count} records to {out_path}") + + print("\nDone.") + + +if __name__ == "__main__": + main()