diff --git a/src/programbench/cli/main.py b/src/programbench/cli/main.py index 6a36792..1b8d94d 100644 --- a/src/programbench/cli/main.py +++ b/src/programbench/cli/main.py @@ -54,7 +54,14 @@ def eval( filter_spec: str = typer.Option("", "--filter", help="Filter instance IDs by regex"), slice_spec: str = typer.Option("", "--slice", help="Slice specification (e.g. '0:5')"), summarize_only: bool = typer.Option(False, "--summarize-only", help="Skip evaluation; just read existing results"), - image_tag: str = typer.Option("task", "--image-tag", help="Docker image tag to evaluate"), + image_tag: str = typer.Option( + "task_cleanroom", + "--image-tag", + help="Docker image tag to evaluate submissions in. Defaults to the " + "artifact-free cleanroom image so submissions can't rely on build " + "artifacts leaked into the full :task build environment. Pass " + "--image-tag task to use the full build environment instead.", + ), output: str = typer.Option( "", "-o", diff --git a/src/programbench/eval/eval.py b/src/programbench/eval/eval.py index b423f3d..1d73a91 100644 --- a/src/programbench/eval/eval.py +++ b/src/programbench/eval/eval.py @@ -289,7 +289,7 @@ def __init__( submission_archive: Path | None = None, blob_dir: Path | None = None, remove_hashes: list[str] | None = None, - image_tag: str = "task", + image_tag: str = "task_cleanroom", from_existing: EvaluationResult | None = None, instance_id: str = "", docker_cpus: int = DOCKER_CPUS, diff --git a/src/programbench/eval/eval_batch.py b/src/programbench/eval/eval_batch.py index 8faa0a3..0a3ae12 100644 --- a/src/programbench/eval/eval_batch.py +++ b/src/programbench/eval/eval_batch.py @@ -229,7 +229,7 @@ def _evaluate_instance( source_dir: Path, target_dir: Path, force: bool, - image_tag: str = "task", + image_tag: str = "task_cleanroom", docker_cpus: int = DOCKER_CPUS, branch_workers: int = 1, branch_retries: int = 1, @@ -374,7 +374,7 @@ def run_eval_batch( branch_workers: int = 1, docker_cpus: int = DOCKER_CPUS, summarize_only: bool = False, - image_tag: str = "task", + image_tag: str = "task_cleanroom", output: str | Path = "", branch_retries: int = 1, ) -> None: