diff --git a/environments/abstention/README.md b/environments/abstention/README.md
new file mode 100644
index 000000000..cd68ed897
--- /dev/null
+++ b/environments/abstention/README.md
@@ -0,0 +1,50 @@
+# Description
+
+This is an environment that trains a policy model to abstain from answering when unsure rather than hallucinating. It uses a three-tier reward scheme:
+
+- **Correct** (1.0): The model provides a correct answer
+- **Abstain** (configurable, default 0.5): The model outputs `\boxed{[IDK]}` or the LLM judge grades the answer as NOT_ATTEMPTED
+- **Incorrect** (0.0): The model provides an incorrect answer
+
+Correctness is verified by an LLM judge using the OMNISCIENCE_GRADER template instead of string matching. The judge grades the model's extracted answer against the gold target as one of CORRECT, INCORRECT, or NOT_ATTEMPTED.
+
+The dataset used is [HotPotQA](https://hotpotqa.github.io/) (fullwiki split).
+
+# Example usage
+
+## Running servers
+
+```bash
+config_paths="responses_api_models/openai_model/configs/openai_model.yaml, \
+environments/abstention/config.yaml"
+ng_run "+config_paths=[$config_paths]" \
+    +abstention.resources_servers.abstention.judge_model_server.name=policy_model
+```
+
+## Collecting rollouts
+
+```bash
+ng_collect_rollouts \
+    +agent_name=abstention_simple_agent \
+    +input_jsonl_fpath=environments/abstention/data/example.jsonl \
+    +output_jsonl_fpath=results/abstention_verify_responses.jsonl \
+    +limit=3
+```
+
+## Preprocessing HotPotQA data
+
+```bash
+python environments/abstention/prepare.py \
+    --download \
+    --raw-data-dir /path/to/data/hotpotqa \
+    --output-dir environments/abstention/data
+```
+
+# Licensing information
+
+Code: Apache 2.0
+Data:
+- HotPotQA: Creative Commons Attribution-ShareAlike 4.0 International
+
+Dependencies:
+- nemo_gym: Apache 2.0
diff --git a/environments/abstention/__init__.py b/environments/abstention/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/environments/abstention/config.yaml b/environments/abstention/config.yaml
new file mode 100644
index 000000000..fa5281758
--- /dev/null
+++ b/environments/abstention/config.yaml
@@ -0,0 +1,57 @@
+# Abstention Environment
+# Trains a policy model to abstain from answering when unsure rather than
+# hallucinating.  Uses a three-tier reward: correct (1.0) > abstain (lambda) >
+# incorrect (0.0).  Verification uses an LLM judge (OMNISCIENCE_GRADER) instead
+# of string matching.
+
+abstention:
+  resources_servers:
+    abstention:
+      entrypoint: app.py
+      domain: rlhf
+      verified: false
+      description: Train models to abstain when unsure using three-tier reward on HotPotQA with LLM judge
+      value: Improve calibration by rewarding abstention over incorrect answers
+
+      # LLM judge model server
+      judge_model_server:
+        type: responses_api_models
+        name: genrm_model
+      judge_responses_create_params:
+        input: []
+        max_output_tokens: 64
+        temperature: 0.0
+        top_p: 1.0
+
+      # Reward for abstaining (lambda). Must satisfy 0 < lambda < 1.
+      abstention_reward: 0.5
+
+      # Token the model outputs inside \boxed{} to signal abstention
+      abstention_token: "[IDK]"
+
+      # Reward for correct and incorrect answers
+      correct_reward: 1.0
+      incorrect_reward: 0.0
+
+abstention_simple_agent:
+  responses_api_agents:
+    simple_agent:
+      entrypoint: app.py
+      resources_server:
+        type: resources_servers
+        name: abstention
+      model_server:
+        type: responses_api_models
+        name: policy_model
+      datasets:
+        - name: hotpotqa_train
+          type: train
+          jsonl_fpath: environments/abstention/data/hotpotqa_train.jsonl
+          license: Creative Commons Attribution-ShareAlike 4.0 International
+        - name: hotpotqa_val
+          type: validation
+          jsonl_fpath: environments/abstention/data/hotpotqa_val.jsonl
+          license: Creative Commons Attribution-ShareAlike 4.0 International
+        - name: example
+          type: example
+          jsonl_fpath: environments/abstention/data/example.jsonl
diff --git a/environments/abstention/data/.gitignore b/environments/abstention/data/.gitignore
new file mode 100644
index 000000000..cf42462ab
--- /dev/null
+++ b/environments/abstention/data/.gitignore
@@ -0,0 +1,6 @@
+*train.jsonl
+*validation.jsonl
+*val.jsonl
+*train_prepare.jsonl
+*validation_prepare.jsonl
+*example_prepare.jsonl
diff --git a/environments/abstention/data/example.jsonl b/environments/abstention/data/example.jsonl
new file mode 100644
index 000000000..5dd7b275d
--- /dev/null
+++ b/environments/abstention/data/example.jsonl
@@ -0,0 +1,5 @@
+{"id": "example_0", "question": "Were Scott Derrickson and Ed Wood of the same nationality?", "answer": "Yes", "agent_ref": {"type": "responses_api_agents", "name": "abstention_simple_agent"}, "responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "Were Scott Derrickson and Ed Wood of the same nationality?"}], "tools": [], "parallel_tool_calls": false}}
+{"id": "example_1", "question": "What government position was held by the woman who portrayed Vivien Harmon in Murder House?", "answer": "United Nations Goodwill Ambassador", "agent_ref": {"type": "responses_api_agents", "name": "abstention_simple_agent"}, "responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "What government position was held by the woman who portrayed Vivien Harmon in Murder House?"}], "tools": [], "parallel_tool_calls": false}}
+{"id": "example_2", "question": "Which magazine was started first, Arthur's Magazine or First for Women?", "answer": "Arthur's Magazine", "agent_ref": {"type": "responses_api_agents", "name": "abstention_simple_agent"}, "responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "Which magazine was started first, Arthur's Magazine or First for Women?"}], "tools": [], "parallel_tool_calls": false}}
+{"id": "example_3", "question": "The director of the romantic comedy \"Big Stone Gap\" is based in what New York City neighborhood?", "answer": "Greenwich Village", "agent_ref": {"type": "responses_api_agents", "name": "abstention_simple_agent"}, "responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "The director of the romantic comedy \"Big Stone Gap\" is based in what New York City neighborhood?"}], "tools": [], "parallel_tool_calls": false}}
+{"id": "example_4", "question": "Who was known by his stage name combiz Ransen, an Iranian-Canadian film director?", "answer": "Ransen Kamangar", "agent_ref": {"type": "responses_api_agents", "name": "abstention_simple_agent"}, "responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "Who was known by his stage name combiz Ransen, an Iranian-Canadian film director?"}], "tools": [], "parallel_tool_calls": false}}
diff --git a/environments/abstention/data/example_metrics.json b/environments/abstention/data/example_metrics.json
new file mode 100644
index 000000000..8e79cdb0e
--- /dev/null
+++ b/environments/abstention/data/example_metrics.json
@@ -0,0 +1,50 @@
+{
+    "name": "example",
+    "type": "example",
+    "jsonl_fpath": "resources_servers/abstention/data/example.jsonl",
+    "num_repeats": 1,
+    "gitlab_identifier": null,
+    "huggingface_identifier": null,
+    "license": null,
+    "Number of examples": 5,
+    "Number of tools": {
+        "Total # non-null values": 5,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Standard deviation": 0.0
+    },
+    "Json-dumped number of words (proxy for token count)": {
+        "Total # non-null values": 5,
+        "Average": 78.2,
+        "Min": 75.0,
+        "Max": 82.0,
+        "Standard deviation": 2.86
+    },
+    "Number of turns": {
+        "Total # non-null values": 5,
+        "Average": 1.0,
+        "Min": 1.0,
+        "Max": 1.0,
+        "Standard deviation": 0.0
+    },
+    "Temperature": {
+        "Total # non-null values": 0,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Standard deviation": 0.0
+    },
+    "id": {
+        "unique_count": 5,
+        "total_count": 5
+    },
+    "question": {
+        "unique_count": 5,
+        "total_count": 5
+    },
+    "answer": {
+        "unique_count": 5,
+        "total_count": 5
+    }
+}
\ No newline at end of file
diff --git a/environments/abstention/data/example_rollouts.jsonl b/environments/abstention/data/example_rollouts.jsonl
new file mode 100644
index 000000000..ccf55a8e5
--- /dev/null
+++ b/environments/abstention/data/example_rollouts.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "Were Scott Derrickson and Ed Wood of the same nationality?"}], "tools": [], "parallel_tool_calls": false}, "response": {"id": "resp_example_0", "created_at": 1700000000.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "synthetic-example", "object": "response", "output": [{"id": "msg_example_0", "content": [{"annotations": [], "text": "\\boxed{Yes}", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": false, "temperature": 1.0, "tool_choice": "auto", "tools": [], "top_p": 1.0, "background": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": "completed", "text": null, "top_logprobs": null, "truncation": null, "usage": {"input_tokens": 100, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 20, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 120}, "user": null}, "reward": 1.0, "id": "example_0", "question": "Were Scott Derrickson and Ed Wood of the same nationality?", "answer": "Yes", "extracted_answer": "Yes", "ground_truth": "Yes", "verdict": "correct", "judge_output": "A", "is_correct": 1.0, "is_abstain": 0.0, "is_incorrect": 0.0, "omniscience_index": 1.0}
+{"responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "What government position was held by the woman who portrayed Vivien Harmon in Murder House?"}], "tools": [], "parallel_tool_calls": false}, "response": {"id": "resp_example_1", "created_at": 1700000000.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "synthetic-example", "object": "response", "output": [{"id": "msg_example_1", "content": [{"annotations": [], "text": "\\boxed{United Nations Goodwill Ambassador}", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": false, "temperature": 1.0, "tool_choice": "auto", "tools": [], "top_p": 1.0, "background": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": "completed", "text": null, "top_logprobs": null, "truncation": null, "usage": {"input_tokens": 100, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 20, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 120}, "user": null}, "reward": 1.0, "id": "example_1", "question": "What government position was held by the woman who portrayed Vivien Harmon in Murder House?", "answer": "United Nations Goodwill Ambassador", "extracted_answer": "United Nations Goodwill Ambassador", "ground_truth": "United Nations Goodwill Ambassador", "verdict": "correct", "judge_output": "A", "is_correct": 1.0, "is_abstain": 0.0, "is_incorrect": 0.0, "omniscience_index": 1.0}
+{"responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "Which magazine was started first, Arthur's Magazine or First for Women?"}], "tools": [], "parallel_tool_calls": false}, "response": {"id": "resp_example_2", "created_at": 1700000000.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "synthetic-example", "object": "response", "output": [{"id": "msg_example_2", "content": [{"annotations": [], "text": "\\boxed{Arthur's Magazine}", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": false, "temperature": 1.0, "tool_choice": "auto", "tools": [], "top_p": 1.0, "background": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": "completed", "text": null, "top_logprobs": null, "truncation": null, "usage": {"input_tokens": 100, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 20, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 120}, "user": null}, "reward": 1.0, "id": "example_2", "question": "Which magazine was started first, Arthur's Magazine or First for Women?", "answer": "Arthur's Magazine", "extracted_answer": "Arthur's Magazine", "ground_truth": "Arthur's Magazine", "verdict": "correct", "judge_output": "A", "is_correct": 1.0, "is_abstain": 0.0, "is_incorrect": 0.0, "omniscience_index": 1.0}
+{"responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "The director of the romantic comedy \"Big Stone Gap\" is based in what New York City neighborhood?"}], "tools": [], "parallel_tool_calls": false}, "response": {"id": "resp_example_3", "created_at": 1700000000.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "synthetic-example", "object": "response", "output": [{"id": "msg_example_3", "content": [{"annotations": [], "text": "\\boxed{Greenwich Village}", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": false, "temperature": 1.0, "tool_choice": "auto", "tools": [], "top_p": 1.0, "background": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": "completed", "text": null, "top_logprobs": null, "truncation": null, "usage": {"input_tokens": 100, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 20, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 120}, "user": null}, "reward": 1.0, "id": "example_3", "question": "The director of the romantic comedy \"Big Stone Gap\" is based in what New York City neighborhood?", "answer": "Greenwich Village", "extracted_answer": "Greenwich Village", "ground_truth": "Greenwich Village", "verdict": "correct", "judge_output": "A", "is_correct": 1.0, "is_abstain": 0.0, "is_incorrect": 0.0, "omniscience_index": 1.0}
+{"responses_create_params": {"input": [{"role": "system", "content": "You will be given a question, answer with JUST the answer (no explanation).\nPut your final answer in \\boxed{your answer} format.\nIf you do not know the answer, or you need more context or tools to answer the question,\noutput \\boxed{[IDK]} instead of guessing.\nIt is better to not provide an answer when unsure than to be wrong."}, {"role": "user", "content": "Who was known by his stage name combiz Ransen, an Iranian-Canadian film director?"}], "tools": [], "parallel_tool_calls": false}, "response": {"id": "resp_example_4", "created_at": 1700000000.0, "error": null, "incomplete_details": null, "instructions": null, "metadata": {}, "model": "synthetic-example", "object": "response", "output": [{"id": "msg_example_4", "content": [{"annotations": [], "text": "\\boxed{[IDK]}", "type": "output_text", "logprobs": []}], "role": "assistant", "status": "completed", "type": "message"}], "parallel_tool_calls": false, "temperature": 1.0, "tool_choice": "auto", "tools": [], "top_p": 1.0, "background": null, "max_output_tokens": null, "max_tool_calls": null, "previous_response_id": null, "prompt": null, "reasoning": null, "service_tier": null, "status": "completed", "text": null, "top_logprobs": null, "truncation": null, "usage": {"input_tokens": 100, "input_tokens_details": {"cached_tokens": 0}, "output_tokens": 20, "output_tokens_details": {"reasoning_tokens": 0}, "total_tokens": 120}, "user": null}, "reward": 0.5, "id": "example_4", "question": "Who was known by his stage name combiz Ransen, an Iranian-Canadian film director?", "answer": "Ransen Kamangar", "extracted_answer": "[IDK]", "ground_truth": "Ransen Kamangar", "verdict": "abstain", "judge_output": null, "is_correct": 0.0, "is_abstain": 1.0, "is_incorrect": 0.0, "omniscience_index": 0.0}
diff --git a/environments/abstention/prepare.py b/environments/abstention/prepare.py
new file mode 100644
index 000000000..0221ee442
--- /dev/null
+++ b/environments/abstention/prepare.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Preprocess HotPotQA into JSONL for the abstention environment.
+
+Downloads the HotPotQA dataset from HuggingFace if --download is passed,
+then converts the raw data into the JSONL format expected by the
+abstention resources server.  This version:
+
+  - Includes the ground-truth ``answer`` field in every record (needed by
+    the LLM judge).
+  - Injects a system prompt instructing the model to use \\boxed{} format
+    and \\boxed{[IDK]} when unsure.
+
+Usage:
+    # Download and preprocess in one step
+    python dataset_preprocess.py \\
+        --download \\
+        --raw-data-dir /path/to/data/hotpotqa \\
+        --output-dir ./data
+
+    # Preprocess only (data already downloaded)
+    python dataset_preprocess.py \\
+        --raw-data-dir /path/to/data/hotpotqa \\
+        --output-dir ./data
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+from pathlib import Path
+
+
+SYSTEM_PROMPT = (
+    "You will be given a question, answer with JUST the answer (no explanation).\n"
+    "Put your final answer in \\boxed{your answer} format.\n"
+    "If you do not know the answer, or you need more context or tools to answer the question,\n"
+    "output \\boxed{[IDK]} instead of guessing.\n"
+    "It is better to not provide an answer when unsure than to be wrong."
+)
+
+
+def download_hotpotqa(raw_data_dir: str, splits: list[str] | None = None) -> None:
+    """Download HotPotQA fullwiki from HuggingFace and save as JSONL."""
+    from datasets import load_dataset
+
+    os.makedirs(raw_data_dir, exist_ok=True)
+    splits = splits or ["train", "validation"]
+
+    for split in splits:
+        out_path = os.path.join(raw_data_dir, f"fullwiki_{split}.jsonl")
+        if os.path.exists(out_path):
+            print(f"  {out_path} already exists, skipping download.")
+            continue
+        print(f"  Downloading HotPotQA fullwiki {split}...")
+        ds = load_dataset("hotpotqa/hotpot_qa", "fullwiki", split=split, cache_dir=raw_data_dir)
+        ds.to_json(out_path)
+        print(f"  Saved {len(ds)} records to {out_path}")
+
+
+def build_record(row: dict, idx: int) -> dict:
+    """Convert a single HotPotQA row into an abstention record."""
+    question = row["question"]
+    answer = row["answer"]
+    record_id = row.get("id", idx)
+
+    return {
+        "id": record_id,
+        "question": question,
+        "answer": answer,
+        "agent_ref": {
+            "type": "responses_api_agents",
+            "name": "abstention_simple_agent",
+        },
+        "responses_create_params": {
+            "input": [
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": question},
+            ],
+            "tools": [],
+            "parallel_tool_calls": False,
+        },
+    }
+
+
+def preprocess_split(
+    raw_jsonl_path: Path,
+    output_path: Path,
+    max_samples: int | None = None,
+) -> int:
+    """Read a raw HotPotQA JSONL and write the preprocessed version."""
+    count = 0
+    with open(raw_jsonl_path, "r", encoding="utf-8") as fin, open(output_path, "w", encoding="utf-8") as fout:
+        for idx, line in enumerate(fin):
+            if max_samples is not None and count >= max_samples:
+                break
+            row = json.loads(line)
+            record = build_record(row, idx)
+            fout.write(json.dumps(record, ensure_ascii=False) + "\n")
+            count += 1
+    return count
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Preprocess HotPotQA for the abstention environment")
+    parser.add_argument(
+        "--download",
+        action="store_true",
+        help="Download the HotPotQA dataset from HuggingFace first.",
+    )
+    parser.add_argument(
+        "--raw-data-dir",
+        type=Path,
+        required=True,
+        help="Directory containing (or to download to) raw HotPotQA JSONL files.",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        default=Path(__file__).parent / "data",
+        help="Output directory for preprocessed JSONL files (default: ./data).",
+    )
+    parser.add_argument(
+        "--max-train-samples",
+        type=int,
+        default=None,
+        help="Limit the number of training samples (default: use all).",
+    )
+    parser.add_argument(
+        "--max-val-samples",
+        type=int,
+        default=None,
+        help="Limit the number of validation samples (default: use all).",
+    )
+    args = parser.parse_args()
+
+    if args.download:
+        print("Step 1: Downloading HotPotQA from HuggingFace...")
+        download_hotpotqa(str(args.raw_data_dir))
+    else:
+        print("Skipping download (use --download to fetch from HuggingFace).")
+
+    args.output_dir.mkdir(parents=True, exist_ok=True)
+
+    splits = [
+        ("fullwiki_train.jsonl", "hotpotqa_train.jsonl", args.max_train_samples),
+        ("fullwiki_validation.jsonl", "hotpotqa_val.jsonl", args.max_val_samples),
+    ]
+
+    for raw_name, out_name, max_samples in splits:
+        raw_path = args.raw_data_dir / raw_name
+        out_path = args.output_dir / out_name
+        if not raw_path.exists():
+            print(f"Warning: {raw_path} not found, skipping.")
+            continue
+        print(f"Preprocessing {raw_path} -> {out_path}...")
+        count = preprocess_split(raw_path, out_path, max_samples)
+        print(f"  Wrote {count} records to {out_path}")
+
+    print("\nDone.")
+
+
+if __name__ == "__main__":
+    main()