facebookresearch · klieret · Jun 18, 2026 · Jun 18, 2026
diff --git a/docs/README.md b/docs/README.md
@@ -7,11 +7,11 @@
 
 ## Inference
 
-Please use the images with tag `task_cleanroom` from `https://hub.docker.com/orgs/programbench/repositories`.
+Please use the images with tag `task_cleanroom_v6` from `https://hub.docker.com/orgs/programbench/repositories`.
 E.g., to solve the task `ffmpeg__ffmpeg.360a402`, use the following image:
 
 ```
-https://hub.docker.com/repository/docker/programbench/ffmpeg_1776_ffmpeg.360a402/tags/task_cleanroom/
+https://hub.docker.com/repository/docker/programbench/ffmpeg_1776_ffmpeg.360a402/tags/task_cleanroom_v6/
 ```
 
 (the `__` is replaced by `_1776_`).
@@ -49,7 +49,7 @@ After following the installation instructions from the [README](../README.md#qui
 uv run programbench eval /path/to/my-amazing-agent-run
 ```
 
-The evaluation will automatically pull all required docker containers (e.g., `ffmpeg_1776_ffmpeg.360a402:task`).
+The evaluation will automatically pull all required docker containers (e.g., `ffmpeg_1776_ffmpeg.360a402:task_v6`).
 
 > [!TIP]
 > Test blobs (per-branch test archives) are downloaded on demand from HuggingFace during evaluation.

diff --git a/src/programbench/cli/main.py b/src/programbench/cli/main.py
@@ -55,12 +55,12 @@ def eval(
     slice_spec: str = typer.Option("", "--slice", help="Slice specification (e.g. '0:5')"),
     summarize_only: bool = typer.Option(False, "--summarize-only", help="Skip evaluation; just read existing results"),
     image_tag: str = typer.Option(
-        "task_cleanroom",
+        "task_cleanroom_v6",
         "--image-tag",
         help="Docker image tag to evaluate submissions in. Defaults to the "
         "artifact-free cleanroom image so submissions can't rely on build "
         "artifacts leaked into the full :task build environment. Pass "
-        "--image-tag task to use the full build environment instead.",
+        "--image-tag task_v6 to use the full build environment instead.",
     ),
     output: str = typer.Option(
         "",

diff --git a/src/programbench/eval/eval.py b/src/programbench/eval/eval.py
@@ -290,7 +290,7 @@ def __init__(
         submission_archive: Path | None = None,
         blob_dir: Path | None = None,
         remove_hashes: list[str] | None = None,
-        image_tag: str = "task_cleanroom",
+        image_tag: str = "task_cleanroom_v6",
         from_existing: EvaluationResult | None = None,
         instance_id: str = "",
         docker_cpus: int = DOCKER_CPUS,

diff --git a/src/programbench/eval/eval_batch.py b/src/programbench/eval/eval_batch.py
@@ -229,7 +229,7 @@ def _evaluate_instance(
     source_dir: Path,
     target_dir: Path,
     force: bool,
-    image_tag: str = "task_cleanroom",
+    image_tag: str = "task_cleanroom_v6",
     docker_cpus: int = DOCKER_CPUS,
     branch_workers: int = 1,
     branch_retries: int = 1,
@@ -374,7 +374,7 @@ def run_eval_batch(
     branch_workers: int = 1,
     docker_cpus: int = DOCKER_CPUS,
     summarize_only: bool = False,
-    image_tag: str = "task_cleanroom",
+    image_tag: str = "task_cleanroom_v6",
     output: str | Path = "",
     branch_retries: int = 1,
 ) -> None: