diff --git a/src/programbench/cli/main.py b/src/programbench/cli/main.py
index 6a36792..85f20b9 100644
--- a/src/programbench/cli/main.py
+++ b/src/programbench/cli/main.py
@@ -9,6 +9,7 @@
 import typer
 
 from programbench.cli.blob import app as blob_app
+from programbench.cli.submit import app as submit_app
 from programbench.constants import DOCKER_CPUS
 
 app = typer.Typer(
@@ -18,6 +19,7 @@
     context_settings={"help_option_names": ["-h", "--help"]},
 )
 app.add_typer(blob_app, name="blob")
+app.add_typer(submit_app, name="submit")
 
 
 @app.callback()
diff --git a/src/programbench/cli/submit.py b/src/programbench/cli/submit.py
new file mode 100644
index 0000000..4681a51
--- /dev/null
+++ b/src/programbench/cli/submit.py
@@ -0,0 +1,268 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Submission lifecycle commands: package an eval run, publish its repo, verify a submission,
+register it on the leaderboard, and recombine a split eval.json."""
+
+import shutil
+from pathlib import Path
+
+import typer
+
+app = typer.Typer(no_args_is_help=True, help="Prepare, check, and reassemble leaderboard submissions.")
+
+
+@app.command()
+def package(
+    run_dir: Path = typer.Argument(
+        ..., help="A `programbench eval` run directory (<run_dir>/<iid>/submission.tar.gz)."
+    ),
+    upload_to: str = typer.Option(
+        "",
+        "--upload-to",
+        metavar="ORG[/DATASET]",
+        help="Upload submission.tar.gz and the heavy eval.log.json to a HuggingFace dataset, "
+        "replacing each with a .url + .sha256. A bare org (e.g. 'programbench') creates a "
+        "per-submission dataset org/<run-dir-name>; pass 'org/name' to use an exact dataset.",
+    ),
+    overwrite: bool = typer.Option(
+        False, "--overwrite", help="With --upload-to, re-upload files already present on HF (default: skip them)."
+    ),
+) -> None:
+    """Turn an evaluated run directory into a leaderboard submission, in place.
+
+    Writes a submission.yaml manifest and _stats/score.json, and splits each large
+    eval.json into a light eval.json (kept) + a heavy <iid>.eval.log.json (raw log +
+    failure text) so the repo stays git-pushable. With --upload-to, the heavy files and
+    the submission.tar.gz artifacts are uploaded to HuggingFace. System metadata and
+    trajectories are left as TODO.
+
+    \b
+    Examples:
+        programbench submit package output/my-run
+        programbench submit package output/my-run --upload-to programbench
+    """
+    from rich.console import Console
+
+    from programbench.package import package_run
+
+    result = package_run(run_dir, upload_to=upload_to or None, overwrite=overwrite)
+    console = Console()
+    console.print(
+        f"Packaged [bold]{len(result.packaged)}[/bold] instance(s) in [bold]{result.run_dir}[/bold] "
+        f"(skipped {len(result.skipped)} unknown). "
+        f"mean_score={result.headline.mean_score * 100:.1f} resolved={result.headline.resolved_pct:.1f}%"
+    )
+    console.print(
+        "[dim]Each eval.json was split into eval.json + <iid>.eval.log.json (recombine with "
+        "`programbench submit recombine`). Next: fill in submission.yaml + add traj.json files.[/dim]"
+    )
+
+
+@app.command()
+def verify(
+    submission_dir: Path = typer.Argument(..., help="A packaged submission directory (contains submission.yaml)."),
+    tier1: bool = typer.Option(
+        False, "--tier1", help="Also re-run `programbench eval` and check artifacts reproduce the results (Docker)."
+    ),
+    workers: int = typer.Option(1, "-w", "--workers", help="Instance workers for the Tier-1 re-eval."),
+    filter_spec: str = typer.Option(
+        "", "--filter", help="Restrict Tier-1 re-eval to instance IDs matching this regex."
+    ),
+) -> None:
+    """Verify a submission against its own artifacts.
+
+    Tier 0 (default, no Docker) recomputes each instance's per-test pass/fail from its
+    eval.json and checks it matches _stats/score.json. Tier 1 (--tier1) additionally
+    resolves each submission.tar.gz and re-runs evaluation to confirm the artifacts
+    reproduce the reported scores.
+
+    \b
+    Examples:
+        programbench submit verify ./their-submission
+        programbench submit verify ./their-submission --tier1 -w 4
+    """
+    from rich.console import Console
+    from rich.table import Table
+
+    from programbench.verify import verify_tier0, verify_tier1
+
+    result = (
+        verify_tier1(submission_dir, workers=workers, filter_spec=filter_spec)
+        if tier1
+        else verify_tier0(submission_dir)
+    )
+
+    console = Console()
+    fails = [c for c in result.checks if not c.ok]
+    console.print(
+        f"Tier-{result.tier}: [bold]{len(result.checks) - len(fails)}/{len(result.checks)}[/bold] checks consistent"
+    )
+    if fails:
+        table = Table(title="Discrepancies", box=None)
+        table.add_column("Instance", style="bold")
+        table.add_column("score.json", justify="right")
+        table.add_column("recomputed", justify="right")
+        for c in fails:
+            table.add_row(c.name, str(c.claimed), str(c.computed))
+        console.print(table)
+    if result.ok:
+        console.print("[bold green]PASS[/bold green] — submission is consistent with its artifacts.")
+    else:
+        console.print("[bold red]FAIL[/bold red] — discrepancies found above.")
+        raise typer.Exit(1)
+
+
+@app.command()
+def publish(
+    run_dir: Path = typer.Argument(..., help="A packaged submission directory (contains submission.yaml)."),
+    owner: str = typer.Option(
+        "", "--owner", help="GitHub org/user to create the repo under (default: your gh account)."
+    ),
+    repo: str = typer.Option("", "--repo", help="Repository name (default: the submission directory name)."),
+    private: bool = typer.Option(
+        False, "--private", help="Create the repo private (it must be public before you can register it)."
+    ),
+    remote: str = typer.Option(
+        "", "--remote", help="Push to this existing empty repo URL instead of creating one (the no-gh path)."
+    ),
+    dry_run: bool = typer.Option(
+        False, "--dry-run", help="Show what would be created/pushed; touch no network and make no commit."
+    ),
+) -> None:
+    """Create this submission's public GitHub repo and push it (package -> publish -> register).
+
+    Heavy artifacts already live on HuggingFace (as .url + .sha256 from `package`), so only
+    light files are committed. With `gh` the repo is created and pushed in one shot; without
+    it, pass `--remote <url>` to an empty repo you created, or follow the printed steps. The
+    repo name defaults to the directory name and the URL is read back by `register`, so it is
+    never stored in submission.yaml.
+
+    \b
+    Examples:
+        programbench submit publish ./my-run --dry-run
+        programbench submit publish ./my-run --owner my-org
+    """
+    from rich.console import Console
+
+    from programbench.publish import _origin, publish as do_publish
+
+    console = Console()
+    name = repo or run_dir.resolve().name
+
+    if dry_run:
+        existing = _origin(run_dir)
+        if existing:
+            plan = f"push current commit to existing remote [bold]{existing}[/bold]"
+        elif remote:
+            plan = f"add remote [bold]{remote}[/bold] and push"
+        elif shutil.which("gh"):
+            plan = f"`gh repo create` [bold]{f'{owner}/{name}' if owner else name}[/bold] ({'private' if private else 'public'}), set origin, and push"
+        else:
+            plan = f"commit locally only — no gh and no --remote, so the repo for [bold]{name}[/bold] can't be created"
+        console.print(f"[bold]Would publish[/bold] {run_dir}:\n  {plan}")
+        console.print("[dim]Dry run — no commit, nothing created or pushed. Drop --dry-run to publish.[/dim]")
+        return
+
+    result = do_publish(run_dir, owner=owner, repo=repo, private=private, remote=remote)
+    if result.repo_url:
+        console.print(f"[bold green]Published[/bold green] {name} -> {result.repo_url}")
+        console.print("[dim]Next: `programbench submit register .` to register it on the leaderboard.[/dim]")
+    else:
+        console.print(f"[bold]Committed[/bold] {name} locally.\n{result.next_steps}")
+
+
+@app.command()
+def register(
+    submission_dir: Path = typer.Argument(..., help="A packaged submission directory (contains submission.yaml)."),
+    registry: str = typer.Option(
+        "", "--registry", help="Registry repo to PR against (default: ProgramBench/submissions)."
+    ),
+    source: str = typer.Option(
+        "", "--source", help="Public URL of this submission's repo (default: autodetected from its git remote)."
+    ),
+    commit: str = typer.Option(
+        "", "--commit", help="Commit SHA that was scored (default: autodetected from its git HEAD)."
+    ),
+    dry_run: bool = typer.Option(
+        False, "--dry-run", help="Build the registry entry locally and print the plan; touch no network."
+    ),
+    verify: bool = typer.Option(
+        True, "--verify/--no-verify", help="Run a Tier-0 verify gate before registering (default: on)."
+    ),
+) -> None:
+    """Register a packaged submission on the leaderboard by opening a PR to the registry.
+
+    The PR adds a small submissions/<id>/ entry: a pointer.yaml (the submission repo URL +
+    the exact commit scored) plus the submission.yaml and _stats/ copied from this run. The
+    source URL and commit are read from the run directory's own git remote/HEAD. With `gh`
+    installed the registry is forked and the PR opened for you; otherwise the entry is left
+    committed on a branch and the steps to push + open the PR are printed.
+
+    \b
+    Examples:
+        programbench submit register ./my-run --dry-run
+        programbench submit register ./my-run
+    """
+    import tempfile
+
+    from rich.console import Console
+
+    from programbench.register import REGISTRY_DEFAULT, build_plan, register_submission, write_entry
+
+    console = Console()
+    registry = registry or REGISTRY_DEFAULT
+
+    if verify:
+        from programbench.verify import verify_tier0
+
+        if not verify_tier0(submission_dir).ok:
+            console.print(
+                "[bold red]FAIL[/bold red] — Tier-0 verification failed; fix the submission (or pass "
+                "--no-verify) before registering. Run `programbench submit verify .` to see the mismatch."
+            )
+            raise typer.Exit(1)
+
+    if dry_run:
+        plan = build_plan(submission_dir, registry, source or None, commit or None)
+        with tempfile.TemporaryDirectory() as tmp:
+            entry = write_entry(plan, submission_dir, Path(tmp))
+            files = sorted(str(p.relative_to(entry)) for p in entry.rglob("*") if p.is_file())
+        console.print(f"[bold]Would register[/bold] [cyan]{plan.submission_id}[/cyan] to {plan.registry}")
+        console.print(f"  branch: {plan.branch}")
+        console.print(f"  source: {plan.source}\n  commit: {plan.commit}")
+        console.print("  files:  " + ", ".join(f"submissions/{plan.submission_id}/{f}" for f in files))
+        console.print(f"\n[dim]pointer.yaml:[/dim]\n{plan.pointer.rstrip()}")
+        console.print(f"\n[dim]PR title:[/dim] {plan.title}\n[dim]PR body:[/dim]\n{plan.body}")
+        console.print("\n[dim]Dry run — nothing cloned, pushed, or opened. Drop --dry-run to register.[/dim]")
+        return
+
+    result = register_submission(submission_dir, registry, source or None, commit or None)
+    if result.pr_url:
+        console.print(f"[bold green]Opened PR[/bold green] for {result.plan.submission_id}: {result.pr_url}")
+    else:
+        console.print(f"[bold]Prepared[/bold] registry entry for {result.plan.submission_id}.\n{result.next_steps}")
+
+
+@app.command()
+def recombine(
+    run_dir: Path = typer.Argument(..., help="A packaged run/submission directory."),
+) -> None:
+    """Reverse `package`'s eval split: fold each <iid>.eval.log.json back into its
+    eval.json, restoring the original full eval output.
+
+    The heavy file is read locally, or downloaded from its .url if it was uploaded to HF.
+
+    \b
+    Examples:
+        programbench submit recombine ./their-submission
+    """
+    from rich.console import Console
+
+    from programbench.submission import recombine_eval_json
+
+    n = sum(recombine_eval_json(d, d.name) for d in sorted(p for p in run_dir.iterdir() if p.is_dir()))
+    Console().print(f"Recombined [bold]{n}[/bold] eval.json file(s) in {run_dir}")
diff --git a/src/programbench/data/templates/README.md.j2 b/src/programbench/data/templates/README.md.j2
new file mode 100644
index 0000000..9e6d1bb
--- /dev/null
+++ b/src/programbench/data/templates/README.md.j2
@@ -0,0 +1,78 @@
+<p align="center">
+  <a href="https://programbench.com"><img src="https://programbench.com/static/images/fox_hero_200.png" width="110" alt="ProgramBench"></a>
+</p>
+
+> A submission to the **[ProgramBench](https://programbench.com)** leaderboard — *can language models rebuild programs from scratch?*  ·  [Leaderboard](https://programbench.com)  ·  [How to submit](https://programbench.com/blog/submission-guide)
+
+# [Submission Name Here]
+
+<!-- Manifest, scores, and per-test results live in `submission.yaml` and `_stats/`. This file
+is for the things the manifest can't capture — please fill in the sections below. -->
+
+## System overview
+
+<!-- One or two paragraphs: what is your system and how does it work end to end? Cover
+     the model (exact id/version and key settings like temperature / reasoning effort),
+     the agent/scaffold (framework + version, prompting, tools, step limits), and your
+     test-time strategy (single attempt, best-of-N, iterative test/fix, ...). -->
+
+## Reproducing this run
+
+<!-- The exact commands to reproduce this submission, ideally runnable as-is. -->
+
+```bash
+# 1. install the agent / dependencies
+# 2. run inference per task (no internet, per the eval protocol)
+# 3. programbench eval <run-dir>
+# 4. programbench submit package <run-dir> --upload-to <org>
+```
+
+## Extra stats (optional)
+
+The leaderboard can show stats beyond `score` — e.g. cost or model calls. These are
+**optional**, and each must be **computed by a script that reads your trajectories**, not
+entered by hand: the number has to be recoverable from the run. `programbench` ships no
+calculators (it makes no assumptions about your scaffold) — write your own that reads each
+`traj.json` and emits a flat `{instance_id: value}` map to `_stats/<name>.json`, and ship
+the script here (e.g. under `_scripts/`) so the numbers are reproducible.
+
+## Links
+
+<!-- Optional: agent/scaffold code, model card, paper, blog post. -->
+
+## Submission checklist
+
+- [ ] Ran `programbench eval` → `programbench submit package` to produce this submission
+- [ ] Filled in every `submission.yaml` field (no `TODO` left), including `is_os_model` / `is_os_scaffold`
+- [ ] Trajectories (`traj.json`) included for every task (agent submissions)
+- [ ] Solutions present — inline `submission.tar.gz`, or a hosted `submission.tar.gz.url` + `.sha256`
+- [ ] Any extra stats (cost/calls) were produced by a trajectory-reading script shipped here, not hand-written
+- [ ] Filled in the System overview and Reproducing sections above
+- [ ] `programbench submit verify .` passes
+- [ ] Made this fork public
+- [ ] Opened a registration PR to the submissions repo
+
+## Integrity attestations
+
+- [ ] Solutions were produced **only** from behavioral observation of the binary and its
+      bundled docs — no source code, repositories, mirrors, or package registries were consulted
+- [ ] The model was not given internet access during evaluation
+- [ ] The model did not have access to any unit tests during evaluation
+- [ ] I consent to re-evaluation, and to flagging or removal if it contradicts the reported results
+
+## Auditing
+
+Anyone can independently check this submission with the following instructions:
+
+```bash
+git clone <your-submission-repo>
+cd {{ submission_id }}
+uvx programbench submit verify .          # Tier-0: recompute the score from this repo's eval.json and check it matches submission.yaml (instant, offline)
+uvx programbench submit verify . --tier1  # Tier-1: download each submission.tar.gz from HuggingFace, re-run evaluation, and confirm it reproduces the score (Docker)
+```
+
+* Tier-0 is self-contained. It reads the per-instance `eval.json` here plus the bundled test
+metadata.
+* Tier-1 additionally fetches the hosted solutions and the hidden tests and re-runs
+them, so the reported `score` is reproduced from scratch. (Cost/calls are self-reported from
+the trajectories; only `score` is independently re-verifiable.)
diff --git a/src/programbench/data/templates/submission.yaml.j2 b/src/programbench/data/templates/submission.yaml.j2
new file mode 100644
index 0000000..1539bb6
--- /dev/null
+++ b/src/programbench/data/templates/submission.yaml.j2
@@ -0,0 +1,23 @@
+# Generated by `programbench submit package` from: {{ run_dir }}
+# [auto] fields are recomputed on every `programbench submit package`; all other fields are preserved.
+schema_version: 1
+
+submission_id: {{ submission_id | tojson }}
+submitter:
+  name: {{ submitter_name | tojson }}
+  contact: {{ submitter_contact | tojson }}    # email or @github
+  affiliation: {{ affiliation | tojson }}
+
+system:
+  agent: {{ agent | tojson }}                   # scaffold/harness; "none" for a pure human submission
+  description_url: {{ description_url | tojson }}
+  is_os_model: {{ is_os_model | tojson }}       # true if the model's weights are openly available
+  is_os_scaffold: {{ is_os_scaffold | tojson }} # true if the agent/scaffold is open source
+  model: {{ model | tojson }}                   # display name used on the leaderboard
+  provider: {{ provider | tojson }}
+  type: {{ system_type | tojson }}              # single-agent | multi-agent | other
+
+eval:
+  programbench_version: {{ programbench_version | tojson }}   # [auto]
+# Scores are not stored here: the leaderboard recomputes them from _stats/score.json with the
+# registry's current ignored-tests list, so any cached numbers would just go stale.
diff --git a/src/programbench/package.py b/src/programbench/package.py
new file mode 100644
index 0000000..13dc7bf
--- /dev/null
+++ b/src/programbench/package.py
@@ -0,0 +1,212 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Turn a ``programbench eval`` run directory into a leaderboard submission, in place.
+
+Packaging is purely eval-derived. It writes:
+
+- ``_stats/score.json`` — per-instance, per-test pass/fail (the one stat from evaluation),
+- ``submission.yaml`` — the manifest, with ``[auto]`` score fields recomputed and any
+  author-entered fields preserved across re-packaging,
+
+and splits each ``<iid>.eval.json`` into a light eval.json + a heavy ``<iid>.eval.log.json``
+(the raw log + failure text) so the run repo stays git-pushable; the two recombine to the
+original via ``programbench submit recombine``. With ``--upload-to`` the heavy files and the
+``submission.tar.gz`` artifacts go to a HuggingFace dataset (replaced by ``.url`` + ``.sha256``).
+
+Other stats (cost, calls, …) are optional and come from the agent trajectories via scripts
+the submitter writes — this command produces none of them, and makes no assumptions about
+the scaffold. The run directory stays a valid input to ``programbench eval``.
+"""
+
+import logging
+import os
+import shutil
+import tempfile
+from dataclasses import dataclass
+from importlib.metadata import version
+from pathlib import Path
+
+import yaml
+from jinja2 import Environment, PackageLoader
+
+from programbench.submission import (
+    Headline,
+    aggregate,
+    benchmark_instances,
+    score_from_tests,
+    sha256_file,
+    split_eval_json,
+    test_results_map,
+    write_stat,
+)
+
+log = logging.getLogger(__name__)
+
+TODO = "TODO"
+
+# Author-entered manifest fields preserved across re-packaging: template var -> (path, default).
+_CARRIED = {
+    "affiliation": ("submitter.affiliation", ""),
+    "agent": ("system.agent", TODO),
+    "description_url": ("system.description_url", "README.md"),
+    "is_os_model": ("system.is_os_model", False),
+    "is_os_scaffold": ("system.is_os_scaffold", False),
+    "model": ("system.model", TODO),
+    "provider": ("system.provider", TODO),
+    "submitter_contact": ("submitter.contact", TODO),
+    "submitter_name": ("submitter.name", TODO),
+    "system_type": ("system.type", "single-agent"),
+}
+
+
+@dataclass
+class PackageResult:
+    run_dir: Path
+    packaged: list[str]
+    skipped: list[str]
+    headline: Headline
+
+
+def _dig(d: dict, dotted: str):
+    for key in dotted.split("."):
+        if not isinstance(d, dict):
+            return None
+        d = d.get(key)
+    return d
+
+
+def _carried_values(run_dir: Path) -> dict:
+    manifest_path = run_dir / "submission.yaml"
+    existing = yaml.safe_load(manifest_path.read_text()) if manifest_path.exists() else {}
+    # Use "is None" (not "or") so a real False/empty value is preserved, not clobbered.
+    return {
+        var: (default if (val := _dig(existing, path)) is None else val) for var, (path, default) in _CARRIED.items()
+    }
+
+
+def _upload_artifacts(
+    api, dataset: str, pending: list[tuple[Path, str, str]], existing: set[str], overwrite: bool
+) -> None:
+    """Upload all pending files to HF, then replace each with a .url + .sha256 and delete it.
+
+    ``pending`` is (instance_dir, instance_id, filename) — submission.tar.gz and the heavy
+    <iid>.eval.log.json. Files already on HF are skipped unless ``overwrite``. Uses
+    ``upload_large_folder`` (resumable, multi-commit, retrying) since logs can be hundreds
+    of MB and a single big commit is fragile; files are hard-linked into a staging tree so
+    nothing is copied.
+    """
+    for instance_dir, iid, fname in pending:
+        (instance_dir / f"{fname}.sha256").write_text(sha256_file(instance_dir / fname) + "\n")
+    to_upload = [(d, iid, f) for d, iid, f in pending if overwrite or f"{iid}/{f}" not in existing]
+    if to_upload:
+        run_dir = pending[0][0].parent
+        with tempfile.TemporaryDirectory(dir=run_dir) as tmp:
+            staging = Path(tmp)
+            for instance_dir, iid, fname in to_upload:
+                dst = staging / iid / fname
+                dst.parent.mkdir(parents=True, exist_ok=True)
+                try:
+                    os.link(instance_dir / fname, dst)  # same-fs hardlink: no copy
+                except OSError:
+                    shutil.copy2(instance_dir / fname, dst)
+            log.info("Uploading %d file(s) to %s (resumable)", len(to_upload), dataset)
+            api.upload_large_folder(repo_id=dataset, folder_path=str(staging), repo_type="dataset")
+    for instance_dir, iid, fname in pending:
+        (instance_dir / f"{fname}.url").write_text(
+            f"https://huggingface.co/datasets/{dataset}/resolve/main/{iid}/{fname}\n"
+        )
+        (instance_dir / fname).unlink()
+
+
+def package_run(run_dir: Path, upload_to: str | None = None, overwrite: bool = False) -> PackageResult:
+    instances = benchmark_instances()
+    run_name = run_dir.resolve().name
+
+    api = dataset = None
+    existing: set[str] = set()
+    if upload_to:
+        # Each submission gets its own dataset: bare "org" -> "org/<run-name>";
+        # an explicit "org/name" is used as-is.
+        dataset = upload_to if "/" in upload_to else f"{upload_to}/{run_name}"
+        from huggingface_hub import HfApi
+
+        api = HfApi()
+        api.create_repo(dataset, repo_type="dataset", exist_ok=True)
+        # Force public so `verify`/`recombine` can fetch the artifacts anonymously
+        # (orgs may default new datasets to private).
+        api.update_repo_settings(dataset, repo_type="dataset", private=False)
+        existing = set(api.list_repo_files(dataset, repo_type="dataset"))
+
+    test_maps: dict[str, dict[str, bool]] = {}
+    packaged: list[str] = []
+    skipped: list[str] = []
+    pending: list[tuple[Path, str, str]] = []
+    for instance_dir in sorted(d for d in run_dir.iterdir() if d.is_dir()):
+        iid = instance_dir.name
+        eval_json = instance_dir / f"{iid}.eval.json"
+        # Any artifact form resolve_submission_tar understands counts as a solution.
+        has_solution = any(
+            (instance_dir / f).exists() for f in ("submission.tar.gz", "submission.tar.gz.url", "submission.ref.yaml")
+        )
+        if not (eval_json.exists() and has_solution):
+            continue
+        if iid not in instances:
+            log.warning("Skipping %s (not a known ProgramBench instance)", iid)
+            skipped.append(iid)
+            continue
+        test_maps[iid] = test_results_map(eval_json, instances[iid])
+        # Split the (potentially huge) eval.json into a light eval.json + a heavy
+        # <iid>.eval.log.json (log + failure text); they recombine to the original.
+        split_eval_json(instance_dir, iid)
+        if api:
+            for fname in (f"{iid}.eval.log.json", "submission.tar.gz"):
+                if (instance_dir / fname).exists():
+                    pending.append((instance_dir, iid, fname))
+        packaged.append(iid)
+
+    if not packaged:
+        raise ValueError(f"No packageable instances found under {run_dir}")
+
+    # Write the scoring-derived artifacts first; they don't depend on the upload, so a
+    # failed/throttled upload leaves them correct and the run simply resumable.
+    # score.json is per-test ({iid: {test: passed}}) so scores can be recomputed later
+    # while striking out specific tests; the manifest headline is the score with no
+    # tests struck.
+    write_stat(run_dir, "score", test_maps)
+    scores = {iid: score_from_tests(m) for iid, m in test_maps.items()}
+    headline = aggregate(scores, len(instances))
+
+    carried = _carried_values(run_dir)
+    env = Environment(loader=PackageLoader("programbench", "data/templates"), autoescape=False)
+    (run_dir / "submission.yaml").write_text(
+        env.get_template("submission.yaml.j2").render(
+            run_dir=run_dir,
+            submission_id=run_dir.resolve().name,
+            programbench_version=version("programbench"),
+            **carried,
+        )
+        + "\n"
+    )
+
+    # README is created once (a starting point for the author); never overwritten.
+    readme = run_dir / "README.md"
+    if not readme.exists():
+        readme.write_text(
+            env.get_template("README.md.j2").render(
+                submission_id=run_dir.resolve().name,
+                mean_pct=round(headline.mean_score * 100, 1),
+                resolved_pct=headline.resolved_pct,
+                n_attempted=headline.n_instances_attempted,
+                n_total=headline.n_instances_total,
+                **carried,
+            )
+        )
+
+    if api and pending:
+        _upload_artifacts(api, dataset, pending, existing, overwrite)
+
+    return PackageResult(run_dir, packaged, skipped, headline)
diff --git a/src/programbench/publish.py b/src/programbench/publish.py
new file mode 100644
index 0000000..81b37e3
--- /dev/null
+++ b/src/programbench/publish.py
@@ -0,0 +1,107 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Create a submission's public GitHub repo and push it.
+
+The middle step between ``package`` and ``register``: it turns a packaged run directory
+into a public Git repo and pushes it. The heavy artifacts already live on HuggingFace (as
+``.url`` + ``.sha256`` written by ``package``), so only light files are committed. With
+``gh`` the repo is created and pushed in one shot; without ``gh`` it commits locally and
+either pushes to a ``--remote`` you pre-created, or prints the steps to finish by hand.
+
+The repo URL is never stored in ``submission.yaml`` — it defaults to the submission id and
+``register`` reads it back from the git remote this sets, keeping the manifest host-agnostic.
+"""
+
+import shutil
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+
+
+def _git(cwd: Path, *args: str) -> str:
+    return subprocess.run(["git", *args], cwd=cwd, check=True, capture_output=True, text=True).stdout.strip()
+
+
+def _to_https(url: str) -> str:
+    """A git remote (``git@host:owner/repo.git`` or ``https://…``) as a browsable https URL."""
+    url = url.removesuffix(".git")
+    if url.startswith("git@"):
+        host, path = url[4:].split(":", 1)
+        return f"https://{host}/{path}"
+    return url
+
+
+def _origin(run_dir: Path) -> str | None:
+    if not (run_dir / ".git").exists() or "origin" not in _git(run_dir, "remote").split():
+        return None
+    return _git(run_dir, "remote", "get-url", "origin")
+
+
+@dataclass
+class PublishResult:
+    repo_url: str | None  # the pushed repo (https), when known
+    committed: bool  # whether a new commit was made
+    next_steps: str | None  # manual steps when we could not finish (no gh, no --remote)
+
+
+def _ensure_committed(run_dir: Path) -> bool:
+    """Init the repo if needed and commit any pending changes; True if a commit was made.
+
+    Supplies a fallback git identity when none is configured (common in fresh CI containers,
+    where ``git commit`` would otherwise error out)."""
+    if not (run_dir / ".git").exists():
+        _git(run_dir, "init", "-b", "main")
+    _git(run_dir, "add", "-A")
+    if not _git(run_dir, "status", "--porcelain"):
+        return False
+    ident = []
+    if subprocess.run(["git", "config", "user.email"], cwd=run_dir, capture_output=True).returncode != 0:
+        ident = ["-c", "user.name=ProgramBench", "-c", "user.email=submissions@programbench.com"]
+    _git(run_dir, *ident, "commit", "-m", f"ProgramBench submission: {run_dir.resolve().name}")
+    return True
+
+
+def _gh_repo_url(slug: str, private: bool) -> str:
+    """The repo's URL, creating it (public unless ``private``) if it doesn't exist yet."""
+    view = ["gh", "repo", "view", slug, "--json", "url", "-q", ".url"]
+    if subprocess.run(view, capture_output=True, text=True).returncode != 0:
+        subprocess.run(
+            ["gh", "repo", "create", slug, "--private" if private else "--public"],
+            check=True,
+            capture_output=True,
+            text=True,
+        )
+    return subprocess.run(view, check=True, capture_output=True, text=True).stdout.strip()
+
+
+def publish(run_dir: Path, owner: str = "", repo: str = "", private: bool = False, remote: str = "") -> PublishResult:
+    name = repo or run_dir.resolve().name
+    committed = _ensure_committed(run_dir)
+
+    # Pick the target repo: an explicit --remote, an already-wired origin, or one created
+    # via gh. Without any of those we can only commit locally and hand back the steps.
+    target = remote or _origin(run_dir)
+    if not target:
+        if not shutil.which("gh"):
+            steps = (
+                "`gh` is not installed and no --remote was given, so the repo could not be created. "
+                f"The submission is committed locally in {run_dir}. To finish:\n"
+                f"  1. Create an empty PUBLIC repo (named '{name}') at https://github.com/new\n"
+                "  2. From the submission directory, wire it up and push:\n"
+                "       git remote add origin <its-url>\n"
+                "       git push -u origin HEAD:main\n"
+                "Then run `programbench submit register .` to register it on the leaderboard."
+            )
+            return PublishResult(None, committed, steps)
+        target = _gh_repo_url(f"{owner}/{name}" if owner else name, private)
+
+    # Push over HTTPS using gh's credentials: reliable everywhere (an SSH origin needs keys
+    # set up, and would fail in sandboxes that block port 22).
+    url = _to_https(target)
+    _git(run_dir, "remote", "set-url" if _origin(run_dir) else "add", "origin", url)
+    _git(run_dir, "push", "-u", "origin", "HEAD:main")
+    return PublishResult(url, committed, None)
diff --git a/src/programbench/register.py b/src/programbench/register.py
new file mode 100644
index 0000000..7494211
--- /dev/null
+++ b/src/programbench/register.py
@@ -0,0 +1,219 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Register a packaged submission into the leaderboard registry by opening a PR.
+
+A registry entry is small and self-contained: a pointer to the submission's own public
+repo, plus the manifest and stat files copied out of it.
+
+    submissions/<id>/
+      pointer.yaml      # source repo URL + the exact commit that was scored
+      submission.yaml   # copied from the submission
+      _stats/*.json     # copied from the submission
+
+This builds that entry against a clone of the registry (default
+github.com/ProgramBench/submissions) and opens the PR. With ``gh`` it forks the registry
+and opens the PR for you; without it, it leaves the commit on a branch in a clone and
+prints the compare URL so you can open the PR by hand.
+"""
+
+import json
+import shutil
+import subprocess
+import tempfile
+from dataclasses import dataclass
+from pathlib import Path
+
+import yaml
+
+REGISTRY_DEFAULT = "https://github.com/ProgramBench/submissions"
+
+
+def _git(cwd: Path, *args: str) -> str:
+    return subprocess.run(["git", *args], cwd=cwd, check=True, capture_output=True, text=True).stdout.strip()
+
+
+def _commit(cwd: Path, message: str) -> None:
+    """Commit staged changes, supplying a fallback identity when git has none configured
+    (common in fresh CI containers, where ``git commit`` would otherwise error out)."""
+    ident = []
+    if subprocess.run(["git", "config", "user.email"], cwd=cwd, capture_output=True).returncode != 0:
+        ident = ["-c", "user.name=ProgramBench", "-c", "user.email=submissions@programbench.com"]
+    _git(cwd, *ident, "commit", "-m", message)
+
+
+def _to_https(url: str) -> str:
+    """A git remote (``git@host:owner/repo.git`` or ``https://…``) as a browsable https URL."""
+    url = url.removesuffix(".git")
+    if url.startswith("git@"):
+        host, path = url[4:].split(":", 1)
+        return f"https://{host}/{path}"
+    return url
+
+
+def _slug(registry: str) -> str:
+    """``https://github.com/Owner/Repo`` -> ``Owner/Repo`` (what ``gh`` expects)."""
+    return _to_https(registry).removeprefix("https://github.com/")
+
+
+@dataclass
+class RegisterPlan:
+    submission_id: str
+    source: str
+    commit: str
+    registry: str
+    branch: str
+    pointer: str  # rendered pointer.yaml
+    files: list[str]  # entry-relative paths that will be added
+    title: str
+    body: str
+
+
+@dataclass
+class RegisterResult:
+    plan: RegisterPlan
+    pr_url: str | None  # set when a PR was opened (gh path)
+    next_steps: str | None  # set when manual steps remain (no-gh path)
+
+
+def build_plan(
+    submission_dir: Path, registry: str, source: str | None = None, commit: str | None = None
+) -> RegisterPlan:
+    sub_id = submission_dir.resolve().name
+    manifest = yaml.safe_load((submission_dir / "submission.yaml").read_text())
+    # Overrides win; otherwise autodetect from the submission's own git remote/HEAD. The
+    # autodetect calls are skipped (short-circuited) when an override is supplied.
+    source = source or _to_https(_git(submission_dir, "remote", "get-url", "origin"))
+    commit = commit or _git(submission_dir, "rev-parse", "HEAD")
+    pointer = yaml.safe_dump({"submission_id": sub_id, "source": source, "commit": commit}, sort_keys=False)
+    files = ["pointer.yaml", "submission.yaml"] + [
+        f"_stats/{p.name}" for p in sorted((submission_dir / "_stats").glob("*.json"))
+    ]
+    system = manifest["system"]
+    n_attempted = len(json.loads((submission_dir / "_stats" / "score.json").read_text()))
+    body = (
+        f"Registers **{system['model']}** ({system['provider']}) + {system['agent']}.\n\n"
+        f"- instances attempted: {n_attempted}\n\n"
+        f"Source: {source}\nCommit: `{commit}`\n\n"
+        "Tier-0 verified (`programbench submit verify .`). Leaderboard scores are recomputed from `_stats/score.json`."
+    )
+    return RegisterPlan(
+        sub_id, source, commit, registry, f"add-{sub_id}", pointer, files, f"Add submission: {sub_id}", body
+    )
+
+
+def write_entry(plan: RegisterPlan, submission_dir: Path, registry_root: Path) -> Path:
+    """Materialize ``submissions/<id>/`` under ``registry_root`` (overwriting any existing entry)."""
+    entry = registry_root / "submissions" / plan.submission_id
+    if entry.exists():
+        shutil.rmtree(entry)
+    (entry / "_stats").mkdir(parents=True)
+    (entry / "pointer.yaml").write_text(plan.pointer)
+    shutil.copyfile(submission_dir / "submission.yaml", entry / "submission.yaml")
+    for p in sorted((submission_dir / "_stats").glob("*.json")):
+        shutil.copyfile(p, entry / "_stats" / p.name)
+    return entry
+
+
+def register_submission(
+    submission_dir: Path, registry: str, source: str | None = None, commit: str | None = None
+) -> RegisterResult:
+    """Clone the registry, commit the entry on a branch, and open the PR.
+
+    With ``gh``: maintainers (push access) get a branch + PR straight on the registry;
+    everyone else forks first (and a fork is only possible if the registry allows it).
+    Without ``gh`` it leaves the commit on a branch in a kept clone and returns the manual
+    push + compare-URL steps in ``next_steps`` (so the clone must outlive this call).
+    """
+    plan = build_plan(submission_dir, registry, source, commit)
+    slug = _slug(registry)
+    clone = Path(tempfile.mkdtemp(prefix="programbench-register-")) / "submissions"
+
+    if shutil.which("gh"):
+        # Maintainers push a branch straight to the registry; others fork (only works if the
+        # registry permits forks — org/private repos often disable them).
+        can_push = (
+            subprocess.run(
+                ["gh", "api", f"repos/{slug}", "--jq", ".permissions.push"], capture_output=True, text=True
+            ).stdout.strip()
+            == "true"
+        )
+        if can_push:
+            _git(clone.parent, "clone", "--depth", "1", _to_https(registry), str(clone))
+            head = plan.branch
+        else:
+            # gh repo fork takes no destination arg, so it clones into <cwd>/<repo-name>;
+            # running from clone.parent makes that equal `clone`.
+            subprocess.run(
+                ["gh", "repo", "fork", slug, "--clone", "--default-branch-only"],
+                cwd=clone.parent,
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+            login = subprocess.run(
+                ["gh", "api", "user", "--jq", ".login"], check=True, capture_output=True, text=True
+            ).stdout.strip()
+            head = f"{login}:{plan.branch}"
+        # Push over HTTPS: gh may wire an ssh remote, and ssh needs keys set up (and is blocked
+        # in some sandboxes), whereas gh's https credentials always work.
+        _git(clone, "remote", "set-url", "origin", _to_https(_git(clone, "remote", "get-url", "origin")))
+        _git(clone, "checkout", "-b", plan.branch)
+        write_entry(plan, submission_dir, clone)
+        _git(clone, "add", f"submissions/{plan.submission_id}")
+        _commit(clone, plan.title)
+        # Force so re-running register updates an existing PR (the add-<id> branch is ours).
+        _git(clone, "push", "-u", "--force", "origin", plan.branch)
+        # Open the PR (explicit --head; gh's inference is unreliable). The branch lookup is the
+        # source of truth: gh pr create can exit nonzero yet still create the PR, and a PR for
+        # the branch may already exist from a prior run.
+        created = subprocess.run(
+            ["gh", "pr", "create", "--repo", slug, "--head", head, "--title", plan.title, "--body", plan.body],
+            cwd=clone,
+            capture_output=True,
+            text=True,
+        )
+        pr_url = subprocess.run(
+            [
+                "gh",
+                "pr",
+                "list",
+                "--repo",
+                slug,
+                "--head",
+                plan.branch,
+                "--state",
+                "open",
+                "--json",
+                "url",
+                "--jq",
+                ".[0].url",
+            ],
+            cwd=clone,
+            check=True,
+            capture_output=True,
+            text=True,
+        ).stdout.strip()
+        if not pr_url:
+            raise RuntimeError(f"gh pr create did not open a PR:\n{created.stderr or created.stdout}")
+        shutil.rmtree(clone.parent)
+        return RegisterResult(plan, pr_url, None)
+
+    # No gh: clone the registry directly, commit the branch, and hand back the steps.
+    _git(clone.parent, "clone", "--depth", "1", _to_https(registry), str(clone))
+    _git(clone, "checkout", "-b", plan.branch)
+    write_entry(plan, submission_dir, clone)
+    _git(clone, "add", f"submissions/{plan.submission_id}")
+    _commit(clone, plan.title)
+    steps = (
+        "`gh` not found, so the PR was not opened. The entry is committed on branch "
+        f"`{plan.branch}` in:\n  {clone}\n\n"
+        "To finish, from that clone push the branch to your fork of the registry and open a PR:\n"
+        "  git remote add fork https://github.com/<you>/submissions\n"
+        f"  git push -u fork {plan.branch}\n"
+        f"  {_to_https(registry)}/compare/main...<you>:{plan.branch}?expand=1"
+    )
+    return RegisterResult(plan, None, steps)
diff --git a/src/programbench/submission.py b/src/programbench/submission.py
new file mode 100644
index 0000000..396a9ec
--- /dev/null
+++ b/src/programbench/submission.py
@@ -0,0 +1,246 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Shared helpers for building (`package`) and checking (`verify`) submissions.
+
+Both commands must score a run directory the same way, so the scoring and headline
+aggregation live here and are imported by each command.
+"""
+
+import hashlib
+import json
+import shutil
+import subprocess
+import tarfile
+import tempfile
+import urllib.parse
+import urllib.request
+from dataclasses import asdict, dataclass
+from pathlib import Path
+
+import yaml
+
+from programbench.eval.eval import EvaluationResult
+from programbench.utils.load_data import get_active_branches, get_ignored_tests, load_all_instances
+
+RESOLVED_THRESHOLD = 1.0
+NEAR_RESOLVED_THRESHOLD = 0.95
+FIXTURE_PREFIX = "testorg__"
+DOWNLOAD_TIMEOUT = 60  # seconds; fail fast rather than hang on a stalled connection
+
+
+def _checked_url(raw: str) -> str:
+    """A submission-supplied URL, rejecting non-http(s) schemes (e.g. file://) to avoid SSRF
+    / local file reads when resolving untrusted third-party submissions."""
+    url = raw.strip()
+    if urllib.parse.urlparse(url).scheme not in ("http", "https"):
+        raise ValueError(f"refusing to fetch non-http(s) URL: {url!r}")
+    return url
+
+
+def benchmark_instances() -> dict[str, dict]:
+    """Real benchmark instances, keyed by id (excludes the bundled test fixture)."""
+    return {i["instance_id"]: i for i in load_all_instances() if not i["instance_id"].startswith(FIXTURE_PREFIX)}
+
+
+def sha256_file(path: Path) -> str:
+    h = hashlib.sha256()
+    with path.open("rb") as f:
+        for chunk in iter(lambda: f.read(1 << 20), b""):
+            h.update(chunk)
+    return h.hexdigest()
+
+
+def test_results_map(eval_json: Path, instance: dict) -> dict[str, bool]:
+    """Per-test pass/fail for one instance, after the same active-branch / ignored-test
+    filtering as ``info``. Keyed by ``"<branch>/<test_name>"``, value ``True`` iff passed.
+
+    This is the raw material a score is computed from, so the leaderboard can later
+    recompute scores while striking out specific tests (see the registry's ignore map).
+    """
+    result = EvaluationResult.model_validate_json(eval_json.read_text())
+    result = result.for_branches(get_active_branches(instance)).without_ignored(get_ignored_tests(instance))
+    return {t.full_name: t.is_resolved for t in result.test_results}
+
+
+def score_from_tests(tests: dict[str, bool], ignore: set[str] = frozenset()) -> float:
+    """Fraction passed over the non-ignored tests (0.0 if none remain)."""
+    kept = [passed for name, passed in tests.items() if name not in ignore]
+    return sum(kept) / len(kept) if kept else 0.0
+
+
+def score_instance(eval_json: Path, instance: dict) -> float:
+    """Per-instance score with ignored-branch/test filtering (same logic as `info`)."""
+    return score_from_tests(test_results_map(eval_json, instance))
+
+
+def score_run(run_dir: Path, instances: dict[str, dict]) -> dict[str, float]:
+    """Map instance_id -> score for every <iid>/<iid>.eval.json present and known."""
+    scores: dict[str, float] = {}
+    for instance_dir in sorted(d for d in run_dir.iterdir() if d.is_dir()):
+        iid = instance_dir.name
+        eval_json = instance_dir / f"{iid}.eval.json"
+        if eval_json.exists() and iid in instances:
+            scores[iid] = score_instance(eval_json, instances[iid])
+    return scores
+
+
+def write_stat(run_dir: Path, stat: str, by_instance: dict[str, object]) -> None:
+    """Write a per-instance stat file: ``<run_dir>/_stats/<stat>.json`` = ``{iid: value}``."""
+    (run_dir / "_stats").mkdir(exist_ok=True)
+    (run_dir / "_stats" / f"{stat}.json").write_text(json.dumps(by_instance, indent=2, sort_keys=True))
+
+
+_HEAVY_EXTRA_KEYS = ("message", "text")
+
+
+def _full_name(t: dict) -> str:
+    return f"{t['branch']}/{t['name']}" if t.get("branch") else t["name"]
+
+
+def split_eval_json(instance_dir: Path, iid: str) -> None:
+    """Split ``<iid>.eval.json`` into a light eval.json + a heavy ``<iid>.eval.log.json``.
+
+    The heavy file holds the only bulky parts — the top-level ``log`` and each failing
+    test's ``message``/``text`` — keyed so the two recombine losslessly. Nothing is dropped;
+    the union of the two files holds everything in the original eval.json (the rebuilt file
+    is semantically identical, though not necessarily byte-for-byte).
+    """
+    p = instance_dir / f"{iid}.eval.json"
+    data = json.loads(p.read_text())
+    # Idempotent: if there's nothing heavy left (already split, or genuinely light), do
+    # nothing — never clobber an existing eval.log.json.
+    has_heavy = bool(data.get("log")) or any(
+        k in (t.get("extra") or {}) for t in data.get("test_results", []) for k in _HEAVY_EXTRA_KEYS
+    )
+    if not has_heavy:
+        return
+    heavy: dict = {"log": data.get("log") or [], "failures": {}}
+    for t in data.get("test_results", []):
+        extra = t.get("extra") or {}
+        moved = {k: extra.pop(k) for k in _HEAVY_EXTRA_KEYS if k in extra}
+        if moved:
+            heavy["failures"][_full_name(t)] = moved
+    data["log"] = []
+    p.write_text(json.dumps(data, indent=2))
+    (instance_dir / f"{iid}.eval.log.json").write_text(json.dumps(heavy))
+
+
+def recombine_eval_json(instance_dir: Path, iid: str) -> bool:
+    """Inverse of :func:`split_eval_json`: fold the heavy file back into ``<iid>.eval.json``
+    (restoring the full eval output losslessly), then remove the heavy file and its
+    ``.url``/``.sha256``.
+
+    The heavy file is read locally, or downloaded from ``<iid>.eval.log.json.url`` if hosted;
+    a downloaded file is checked against its ``.sha256`` sidecar when one is present.
+    Returns True if a recombine happened.
+    """
+    light = instance_dir / f"{iid}.eval.json"
+    log_file = instance_dir / f"{iid}.eval.log.json"
+    url_file = instance_dir / f"{iid}.eval.log.json.url"
+    if not light.exists():
+        return False
+    if log_file.exists():
+        heavy = json.loads(log_file.read_text())
+    elif url_file.exists():
+        with urllib.request.urlopen(_checked_url(url_file.read_text()), timeout=DOWNLOAD_TIMEOUT) as r:  # noqa: S310
+            raw = r.read()
+        sha_file = instance_dir / f"{iid}.eval.log.json.sha256"
+        if sha_file.exists() and (got := hashlib.sha256(raw).hexdigest()) != sha_file.read_text().split()[0]:
+            raise ValueError(f"{iid}: eval.log.json sha256 mismatch on download (got {got[:12]}…)")
+        heavy = json.loads(raw)
+    else:
+        return False
+    data = json.loads(light.read_text())
+    data["log"] = heavy.get("log", [])
+    failures = heavy.get("failures", {})
+    for t in data.get("test_results", []):
+        if (name := _full_name(t)) in failures:
+            t.setdefault("extra", {}).update(failures[name])
+    light.write_text(json.dumps(data, indent=2))
+    for f in (log_file, url_file, instance_dir / f"{iid}.eval.log.json.sha256"):
+        f.unlink(missing_ok=True)
+    return True
+
+
+@dataclass
+class Headline:
+    mean_score: float
+    resolved_pct: float
+    near_resolved_pct: float
+    n_instances_attempted: int
+    n_instances_total: int
+
+    def as_dict(self) -> dict:
+        return asdict(self)
+
+
+def aggregate(scores: dict[str, float], n_total: int) -> Headline:
+    values = list(scores.values())
+    if not values:
+        raise ValueError("No scored instances found")
+    n = len(values)
+    # mean is over attempted instances; resolved/near are over the full benchmark
+    # (an unattempted task counts as unresolved).
+    return Headline(
+        mean_score=round(sum(values) / n, 4),
+        resolved_pct=round(100 * sum(s >= RESOLVED_THRESHOLD for s in values) / n_total, 1),
+        near_resolved_pct=round(100 * sum(s >= NEAR_RESOLVED_THRESHOLD for s in values) / n_total, 1),
+        n_instances_attempted=n,
+        n_instances_total=n_total,
+    )
+
+
+def load_manifest(submission_dir: Path) -> dict:
+    return yaml.safe_load((submission_dir / "submission.yaml").read_text())
+
+
+def resolve_submission_tar(instance_dir: Path, dest_tar: Path) -> None:
+    """Materialize an instance's submission.tar.gz into ``dest_tar``, verifying sha256.
+
+    Supports three artifact forms: inline file, ``.url`` (downloaded), or
+    ``submission.ref.yaml`` (git checkout packed). The sha256 sidecar, when present, is
+    enforced for inline/url; for git it is advisory (packing is not byte-reproducible).
+    """
+    sha_file = instance_dir / "submission.tar.gz.sha256"
+    expected = sha_file.read_text().split()[0] if sha_file.exists() else None
+
+    inline = instance_dir / "submission.tar.gz"
+    url_file = instance_dir / "submission.tar.gz.url"
+    ref_file = instance_dir / "submission.ref.yaml"
+    if inline.exists():
+        shutil.copy2(inline, dest_tar)
+    elif url_file.exists():
+        with (
+            urllib.request.urlopen(_checked_url(url_file.read_text()), timeout=DOWNLOAD_TIMEOUT) as r,  # noqa: S310
+            dest_tar.open("wb") as out,
+        ):
+            shutil.copyfileobj(r, out)
+    elif ref_file.exists():
+        _pack_git_ref(yaml.safe_load(ref_file.read_text()), dest_tar)
+        expected = None  # git packing is not byte-reproducible; rely on re-eval instead
+    else:
+        raise ValueError(f"{instance_dir.name}: no submission.tar.gz, .url, or .ref.yaml found")
+
+    if expected and (got := sha256_file(dest_tar)) != expected:
+        raise ValueError(f"{instance_dir.name}: sha256 mismatch (expected {expected[:12]}…, got {got[:12]}…)")
+
+
+def _pack_git_ref(ref: dict, dest_tar: Path) -> None:
+    with tempfile.TemporaryDirectory() as tmp:
+        src = Path(tmp) / "src"
+        subprocess.run(
+            ["git", "clone", "--depth", "1", "--branch", ref["ref"], ref["repo"], str(src)],
+            check=True,
+            capture_output=True,
+        )
+        root = src / ref["subpath"] if ref.get("subpath") else src
+        with tarfile.open(dest_tar, "w:gz") as tar:
+            for p in sorted(root.rglob("*")):
+                rel = p.relative_to(root).as_posix()
+                if rel.split("/", 1)[0] == ".git":
+                    continue
+                tar.add(p, arcname=rel, recursive=False)
diff --git a/src/programbench/verify.py b/src/programbench/verify.py
new file mode 100644
index 0000000..edb1334
--- /dev/null
+++ b/src/programbench/verify.py
@@ -0,0 +1,116 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Verify a packaged submission against its own artifacts.
+
+Tier 0 (default, no Docker): recompute each instance's per-test pass/fail from its own
+eval.json and check it matches the submitted _stats/score.json — i.e. the reported scores
+faithfully reflect the eval output. A free check a third party or CI can run with only
+``programbench`` installed. (Leaderboard scores aren't stored in the submission, so there
+is no headline to check against.)
+
+Tier 1 (--tier1, Docker): resolve each submission.tar.gz, re-run ``programbench eval``,
+and confirm the freshly produced scores match the submitted eval.json. This is what
+proves the artifacts actually yield the reported results.
+"""
+
+import json
+import re
+import tempfile
+from dataclasses import dataclass
+from pathlib import Path
+
+from programbench.submission import (
+    benchmark_instances,
+    resolve_submission_tar,
+    score_run,
+    test_results_map,
+)
+
+TOLERANCE = 1e-6  # Tier-1 score floats are rounded; this only absorbs representation noise.
+
+
+@dataclass
+class Check:
+    name: str
+    claimed: object
+    computed: object
+    ok: bool
+
+
+@dataclass
+class VerifyResult:
+    tier: int
+    checks: list[Check]
+
+    @property
+    def ok(self) -> bool:
+        return all(c.ok for c in self.checks)
+
+
+def _close(a: object, b: object) -> bool:
+    # Non-numeric (e.g. a user-edited/invalid manifest value) is a failed check, not a crash.
+    if not isinstance(a, (int, float)) or not isinstance(b, (int, float)):
+        return False
+    return abs(a - b) <= TOLERANCE
+
+
+def verify_tier0(submission_dir: Path) -> VerifyResult:
+    """Per instance, recompute the per-test pass/fail from its eval.json and check it matches
+    the submitted _stats/score.json (so the stored scores reflect the eval output, untampered)."""
+    instances = benchmark_instances()
+    stored = json.loads((submission_dir / "_stats" / "score.json").read_text())
+    checks = []
+    for iid, stored_map in sorted(stored.items()):
+        eval_json = submission_dir / iid / f"{iid}.eval.json"
+        if iid not in instances:
+            checks.append(Check(iid, "in score.json", "not a benchmark instance", False))
+        elif not eval_json.exists():
+            checks.append(Check(iid, f"{sum(stored_map.values())}/{len(stored_map)} pass", "no eval.json", False))
+        else:
+            recomputed = test_results_map(eval_json, instances[iid])
+            checks.append(
+                Check(
+                    iid,
+                    f"{sum(stored_map.values())}/{len(stored_map)} pass",
+                    f"{sum(recomputed.values())}/{len(recomputed)} pass",
+                    recomputed == stored_map,
+                )
+            )
+    return VerifyResult(0, checks)
+
+
+def verify_tier1(submission_dir: Path, *, workers: int = 1, filter_spec: str = "") -> VerifyResult:
+    from programbench.eval.eval_batch import run_eval_batch
+
+    instances = benchmark_instances()
+    sub_root = submission_dir
+    submitted = score_run(sub_root, instances)
+
+    # Same regex semantics as the re-eval filter (instance_filters.filter_instances): only
+    # resolve/download and re-eval the targeted instances, not every submitted tarball.
+    targets = [iid for iid in submitted if not filter_spec or re.match(filter_spec, iid)]
+
+    with tempfile.TemporaryDirectory() as tmp:
+        run = Path(tmp)
+        for iid in targets:
+            (run / iid).mkdir(parents=True)
+            resolve_submission_tar(sub_root / iid, run / iid / "submission.tar.gz")
+        run_eval_batch(sources=[run], workers=workers, filter_spec=filter_spec, force=True)
+        fresh = score_run(run, instances)
+
+    # A targeted instance that produced no fresh score is reported as a failure (NaN), not
+    # silently skipped.
+    checks = [
+        Check(
+            iid,
+            round(submitted[iid], 4),
+            round(fresh[iid], 4) if iid in fresh else float("nan"),
+            _close(submitted[iid], fresh.get(iid)),
+        )
+        for iid in targets
+    ]
+    return VerifyResult(1, checks)
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 594bd9d..984085d 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -37,3 +37,27 @@ def test_blob_sync_help():
     result = runner.invoke(app, ["blob", "sync", "--help"])
     assert result.exit_code == 0
     assert "instance" in result.output.lower()
+
+
+def test_submit_help():
+    result = runner.invoke(app, ["submit", "--help"])
+    assert result.exit_code == 0
+    assert all(cmd in result.output for cmd in ("package", "publish", "verify", "register", "recombine"))
+
+
+def test_submit_package_help():
+    result = runner.invoke(app, ["submit", "package", "--help"])
+    assert result.exit_code == 0
+    assert "upload" in result.output.lower()
+
+
+def test_submit_register_help():
+    result = runner.invoke(app, ["submit", "register", "--help"])
+    assert result.exit_code == 0
+    assert "registry" in result.output.lower()
+
+
+def test_submit_publish_help():
+    result = runner.invoke(app, ["submit", "publish", "--help"])
+    assert result.exit_code == 0
+    assert "owner" in result.output.lower()
diff --git a/tests/test_submission.py b/tests/test_submission.py
new file mode 100644
index 0000000..9d1d5d4
--- /dev/null
+++ b/tests/test_submission.py
@@ -0,0 +1,53 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Tests for submission helpers that don't need the benchmark data."""
+
+import json
+
+from programbench.submission import recombine_eval_json, split_eval_json
+
+
+def test_split_recombine_roundtrip_is_lossless(tmp_path):
+    iid = "org__tool.abc1234"
+    d = tmp_path / iid
+    d.mkdir()
+    original = {
+        "instance_id": iid,
+        "log": [{"step": 1, "out": "x" * 200}, {"step": 2, "out": "y"}],
+        "test_results": [
+            {"branch": "main", "name": "t_pass", "is_resolved": True, "extra": {"duration": 0.5}},
+            {
+                "branch": "main",
+                "name": "t_fail",
+                "is_resolved": False,
+                "extra": {"message": "assertion failed", "text": "trace " * 50, "duration": 1.2},
+            },
+            {"branch": "feat", "name": "t_other", "is_resolved": False, "extra": {"text": "boom"}},
+        ],
+    }
+    eval_json = d / f"{iid}.eval.json"
+    eval_json.write_text(json.dumps(original, indent=2))
+
+    split_eval_json(d, iid)
+    light = json.loads(eval_json.read_text())
+    assert light["log"] == []
+    assert "message" not in light["test_results"][1]["extra"]
+    assert (d / f"{iid}.eval.log.json").exists()
+
+    assert recombine_eval_json(d, iid) is True
+    assert json.loads(eval_json.read_text()) == original
+    assert not (d / f"{iid}.eval.log.json").exists()
+
+
+def test_split_is_idempotent_and_noop_when_light(tmp_path):
+    iid = "org__tool.def5678"
+    d = tmp_path / iid
+    d.mkdir()
+    light = {"instance_id": iid, "log": [], "test_results": [{"branch": "main", "name": "t", "is_resolved": True}]}
+    (d / f"{iid}.eval.json").write_text(json.dumps(light))
+    split_eval_json(d, iid)
+    assert not (d / f"{iid}.eval.log.json").exists()  # nothing heavy -> no split file written