From 5611cb2976da64de9a5d5e13871575535156d603 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 19 Apr 2026 22:30:55 +0000 Subject: [PATCH 1/7] feat(ci,ml): Tier-3 live-prover provisioning + uncap Flux training .github/workflows/live-provers.yml Replace Tier-3 placeholder with a per-backend matrix. Live-provisioned (apt / upstream tarball): tamarin, proverif, metamath, twelf, ortools. Deferred-to-Containerfile with explicit skip note: hol4, acl2, scip, imandra. Each job probes the binary and runs the single matching case from tests/live_prover_suite.rs with continue-on-error while Wave-3 wires up. tests/live_prover_suite.rs Add #[tokio::test] entries for all nine Tier-3 backends and extend kind_label with their display names. Tests SKIP when the binary is absent on PATH (existing `which` probe), so running locally or in PR CI costs nothing; only the weekly tier3 matrix exercises them. src/julia/run_training.jl Drop the hardcoded max_proof_states=50_000 cap that made the expanded corpus pointless. Default 200k on GPU / 50k on CPU, overridable via ECHIDNA_MAX_PROOF_STATES (0 = unlimited). ECHIDNA_NUM_EPOCHS and ECHIDNA_NUM_NEGATIVES gain the same treatment. Required before retraining on the 2026-04 corpus expansion. --- .github/workflows/live-provers.yml | 118 +++++++++++++++++++++++++++-- src/julia/run_training.jl | 29 ++++++- tests/live_prover_suite.rs | 71 +++++++++++++++++ 3 files changed, 209 insertions(+), 9 deletions(-) diff --git a/.github/workflows/live-provers.yml b/.github/workflows/live-provers.yml index e17aaef..06f4b27 100644 --- a/.github/workflows/live-provers.yml +++ b/.github/workflows/live-provers.yml @@ -226,21 +226,127 @@ jobs: run: cargo test --test live_prover_suite --features live-provers -- --nocapture ${{ matrix.backend }} # ============================================================================ - # Tier 3 — weekly. Container-provisioned provers. Wave-3 placeholder. + # Tier 3 — weekly. Heavier upstream provers, best-effort provisioning. + # + # Backends grouped as: + # A — live-provisioned today via apt / upstream tarball (tamarin, proverif, + # metamath, twelf, ortools). Version-check runs, skip-if-absent. + # B — heavy-build deferred (hol4, acl2, scip) and proprietary (imandra). + # Provisioning step emits a skip note; test step runs the suite with + # continue-on-error so the matrix stays green and the binary's absence + # is logged as SKIP by the suite's `which` probe. # ============================================================================ tier3: - name: T3 container provers + name: T3 / ${{ matrix.backend }} if: github.event_name == 'schedule' && github.event.schedule == '0 5 * * 0' || (github.event_name == 'workflow_dispatch' && (inputs.tier == '3' || inputs.tier == 'all')) runs-on: ubuntu-latest continue-on-error: true + strategy: + fail-fast: false + matrix: + backend: + - tamarin + - proverif + - metamath + - twelf + - ortools + - hol4 + - acl2 + - scip + - imandra steps: - name: Checkout uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - name: Announce Wave-3 TODO + - name: Install Rust + uses: dtolnay/rust-toolchain@4be9e76fd7c4901c61fb841f559994984270fce7 # stable + - name: Cache Cargo + uses: Swatinem/rust-cache@779680da715d629ac1d338a641029a2f4372abb5 # v2 + - name: Provision ${{ matrix.backend }} (best-effort) + continue-on-error: true run: | - echo "Tier-3 backends (Tamarin, ProVerif, Imandra, SCIP, OR-Tools, HOL4, ACL2, Twelf, Metamath)" - echo "are provisioned via per-backend Containerfiles in Wave-3 of L3." - echo "See ~/Desktop/ECHIDNA-L3-LIVE-PROVER-CI-PROMPT.md for scope." + set -euo pipefail + sudo apt-get update + case "${{ matrix.backend }}" in + tamarin) + # Tamarin publishes prebuilt Linux x86_64 tarballs. + curl -fsSL -o /tmp/tamarin.tar.gz \ + https://github.com/tamarin-prover/tamarin-prover/releases/latest/download/tamarin-prover-linux64-ubuntu.tar.gz || \ + curl -fsSL -o /tmp/tamarin.tar.gz \ + https://github.com/tamarin-prover/tamarin-prover/releases/download/1.10.0/tamarin-prover-1.10.0-linux64-ubuntu.tar.gz + sudo mkdir -p /opt/tamarin + sudo tar xzf /tmp/tamarin.tar.gz -C /opt/tamarin + # The tarball lays down tamarin-prover directly or in a version dir; link whichever we find. + TAMARIN_BIN="$(find /opt/tamarin -type f -name tamarin-prover | head -n 1)" + [ -n "$TAMARIN_BIN" ] && sudo ln -sf "$TAMARIN_BIN" /usr/local/bin/tamarin-prover + ;; + proverif) + # ProVerif is in the Ubuntu/Debian main repos. + sudo apt-get install -y proverif + ;; + metamath) + # metamath-exe is small; apt-installable on Ubuntu with build fallback. + sudo apt-get install -y metamath || { + sudo apt-get install -y build-essential git autoconf automake + git clone --depth=1 https://github.com/metamath/metamath-exe.git /tmp/mm + (cd /tmp/mm && ./build.sh) || \ + (cd /tmp/mm && autoreconf -i && ./configure && make) + sudo install -m 0755 /tmp/mm/metamath /usr/local/bin/metamath + } + ;; + twelf) + # Twelf is in Debian with SML/NJ; on Ubuntu it may live in universe. + sudo apt-get install -y twelf || \ + echo "twelf unavailable via apt on this runner; test will SKIP." + ;; + ortools) + # Google OR-Tools ships prebuilt Linux tarballs. The binary we + # expose to echidna is a small wrapper that invokes the solver CLI. + OR_URL="https://github.com/google/or-tools/releases/latest/download/or-tools_amd64_ubuntu-22.04_cpp_v9.11.4210.tar.gz" + curl -fsSL -o /tmp/ortools.tar.gz "$OR_URL" || { + echo "OR-Tools tarball unavailable at expected URL; test will SKIP." + exit 0 + } + sudo mkdir -p /opt/ortools + sudo tar xzf /tmp/ortools.tar.gz -C /opt/ortools --strip-components=1 + # Echidna's ORTools backend invokes `ortools_solve`. The release + # ships `bin/solve` or similar; create the expected symlink. + OR_BIN="$(find /opt/ortools -type f \( -name ortools_solve -o -name solve \) | head -n 1)" + [ -n "$OR_BIN" ] && sudo ln -sf "$OR_BIN" /usr/local/bin/ortools_solve + ;; + hol4) + # HOL4 requires Poly/ML and a full tree build (~15min+); defer to + # container provisioning. Test step will SKIP on this runner. + echo "hol4: heavy Poly/ML build deferred to Containerfile provisioning." + ;; + acl2) + # ACL2 requires a Common Lisp image (CCL/SBCL) and a 10min+ build; + # defer to container provisioning. Test step will SKIP. + echo "acl2: heavy SBCL+image build deferred to Containerfile." + ;; + scip) + # SCIP Optimization Suite needs a full cmake build (~10min). The + # upstream tarball requires a form-gated download, so CI defers to + # Containerfile provisioning. Test step will SKIP. + echo "scip: form-gated upstream tarball; deferred to Containerfile." + ;; + imandra) + # Imandra is proprietary; no public Linux binary. Handled via + # vendor-supplied container where a licence is available. + echo "imandra: proprietary; no public CI provisioning available." + ;; + esac + # Best-effort version probe for the matrix log. + case "${{ matrix.backend }}" in + tamarin) tamarin-prover --version 2>&1 | head -n 1 || true ;; + proverif) proverif --version 2>&1 | head -n 1 || true ;; + metamath) echo exit | metamath 2>&1 | head -n 1 || true ;; + twelf) twelf-server --help 2>&1 | head -n 1 || true ;; + ortools) ortools_solve --help 2>&1 | head -n 1 || true ;; + hol4|acl2|scip|imandra) true ;; + esac + - name: Run live test for ${{ matrix.backend }} (allow-fail while Wave-3 wires up) + continue-on-error: true + run: cargo test --test live_prover_suite --features live-provers -- --nocapture ${{ matrix.backend }} # ============================================================================ # Tier 4 — quarterly, best-effort / allow-fail. diff --git a/src/julia/run_training.jl b/src/julia/run_training.jl index 7c38652..93c8f54 100644 --- a/src/julia/run_training.jl +++ b/src/julia/run_training.jl @@ -10,6 +10,14 @@ # data_dir = training_data/ # save_dir = models/neural/ # +# Environment overrides (see below): +# ECHIDNA_MAX_PROOF_STATES — cap on proof states loaded (default 200000 on +# GPU, 50000 on CPU). Set to 0 to disable the cap and consume the entire +# expanded corpus; required when re-baselining after corpus growth. +# ECHIDNA_NUM_EPOCHS — training epochs (default 30). +# ECHIDNA_NUM_NEGATIVES — hard-negative premise samples per example +# (default 20). +# # This script: # 1. Loads JSONL training data (proof states + premises) # 2. Builds vocabulary from the corpus @@ -74,10 +82,24 @@ println("═══════════════════════ println("Loading training data...") println("═══════════════════════════════════════════════════════════") +# Default cap: 200k on GPU (enough to exercise the expanded corpus without +# OOM on a 24GB card), 50k on CPU (keeps wall-clock finite). An operator +# re-baselining after corpus growth sets ECHIDNA_MAX_PROOF_STATES=0 to lift +# the cap entirely. +default_cap = has_gpu ? 200_000 : 50_000 +cap_env = get(ENV, "ECHIDNA_MAX_PROOF_STATES", "") +max_proof_states = isempty(cap_env) ? default_cap : parse(Int, cap_env) +# `load_training_data` treats any value `<= 0` as "load everything". +cap_label = max_proof_states <= 0 ? "unlimited" : string(max_proof_states) +println(" max_proof_states = $cap_label") + +num_negatives = parse(Int, get(ENV, "ECHIDNA_NUM_NEGATIVES", "20")) +println(" num_negatives = $num_negatives") + train_data, val_data, vocab = load_training_data(data_dir; train_split=0.8f0, - max_proof_states=50000, # Cap for reasonable training time - num_negatives=20 + max_proof_states=max_proof_states, + num_negatives=num_negatives, ) if isempty(train_data.examples) @@ -101,8 +123,9 @@ println("Model created successfully") println() # Configure training +num_epochs = parse(Int, get(ENV, "ECHIDNA_NUM_EPOCHS", "30")) training_config = TrainingConfig( - num_epochs=30, + num_epochs=num_epochs, learning_rate=1f-4, lr_schedule=:cosine, weight_decay=1f-5, diff --git a/tests/live_prover_suite.rs b/tests/live_prover_suite.rs index b613802..3ab4cf8 100644 --- a/tests/live_prover_suite.rs +++ b/tests/live_prover_suite.rs @@ -110,6 +110,15 @@ fn kind_label(kind: ProverKind) -> &'static str { ProverKind::Dafny => "Dafny", ProverKind::FStar => "F*", ProverKind::TLAPS => "TLAPS", + ProverKind::Tamarin => "Tamarin", + ProverKind::ProVerif => "ProVerif", + ProverKind::Metamath => "Metamath", + ProverKind::Twelf => "Twelf", + ProverKind::ORTools => "OR-Tools", + ProverKind::HOL4 => "HOL4", + ProverKind::ACL2 => "ACL2", + ProverKind::SCIP => "SCIP", + ProverKind::Imandra => "Imandra", _ => "", } } @@ -214,3 +223,65 @@ async fn live_tlaps_version() { // TLA+ Proof System's prover is `tlapm` (per provers/mod.rs). assert_version_reachable(ProverKind::TLAPS, "tlapm").await; } + +// ========================================================================== +// Tier 3 — weekly. Upstream-tarball or heavier-build provers. Most of +// these SKIP locally and in PR CI; the weekly tier3 matrix in +// live-provers.yml provisions each binary in its own job before running +// `cargo test ... `, so only the matching test runs per job. +// ========================================================================== + +#[tokio::test] +async fn live_tamarin_version() { + assert_version_reachable(ProverKind::Tamarin, "tamarin-prover").await; +} + +#[tokio::test] +async fn live_proverif_version() { + assert_version_reachable(ProverKind::ProVerif, "proverif").await; +} + +#[tokio::test] +async fn live_metamath_version() { + assert_version_reachable(ProverKind::Metamath, "metamath").await; +} + +#[tokio::test] +async fn live_twelf_version() { + // Twelf's CLI entry is `twelf-server` per provers/mod.rs. + assert_version_reachable(ProverKind::Twelf, "twelf-server").await; +} + +#[tokio::test] +async fn live_ortools_version() { + // Echidna's ORTools backend invokes `ortools_solve` (wrapper around the + // OR-Tools C++ solve CLI). Provisioned via upstream tarball. + assert_version_reachable(ProverKind::ORTools, "ortools_solve").await; +} + +#[tokio::test] +async fn live_hol4_version() { + // HOL4 requires Poly/ML + a tree build; provisioning is deferred to + // Containerfile. Test SKIPs on runners without `hol` on PATH. + assert_version_reachable(ProverKind::HOL4, "hol").await; +} + +#[tokio::test] +async fn live_acl2_version() { + // ACL2 requires a Common Lisp image; provisioning deferred to Containerfile. + assert_version_reachable(ProverKind::ACL2, "acl2").await; +} + +#[tokio::test] +async fn live_scip_version() { + // SCIP requires a cmake build of SCIP Optimization Suite; deferred to + // Containerfile. Test SKIPs until provisioned. + assert_version_reachable(ProverKind::SCIP, "scip").await; +} + +#[tokio::test] +async fn live_imandra_version() { + // Imandra is proprietary; handled via vendor-supplied container where a + // licence is available. Test SKIPs on public CI. + assert_version_reachable(ProverKind::Imandra, "imandra").await; +} From 9df340cb77452b24496c941dd9b780af85465293 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 19 Apr 2026 22:34:40 +0000 Subject: [PATCH 2/7] feat(corpus): non-interactive upstream provisioning + extractor driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit scripts/provision_corpora.sh Single source of truth for every prover's upstream corpus mirror. A catalogue entry per source (52 total: 45 git clones, 7 manual/gated) binds the external_corpora// path the extractors expect to a concrete clone URL, replacing the comments-scattered-across-54-files pattern. Idempotent, non-interactive, retries once on network failure, continues past a dead mirror so one 404 doesn't abort a 40-source run. --list, --status, --force, --all, and named-subset selection. Meant for a nightly cron or a Justfile recipe. scripts/extract_all.sh Composable companion. Runs every scripts/extract_*.jl (or a named subset) under the src/julia Julia project and collects failures into a non-zero exit code. Short-circuits gracefully when an extractor's upstream is absent (the extractors themselves already handle that). Together with provision_corpora.sh + run_training.jl this gives a three-step end-to-end retrain recipe: provision → extract → train. Motivation: the existing expand_training_data.sh was interactive (read -p) and only knew about three sources, so no CI job could grow the corpus without human input. Closing that gap unblocks the 100k per-prover target. --- scripts/extract_all.sh | 89 +++++++++++ scripts/provision_corpora.sh | 281 +++++++++++++++++++++++++++++++++++ 2 files changed, 370 insertions(+) create mode 100755 scripts/extract_all.sh create mode 100755 scripts/provision_corpora.sh diff --git a/scripts/extract_all.sh b/scripts/extract_all.sh new file mode 100755 index 0000000..43ef598 --- /dev/null +++ b/scripts/extract_all.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: PMPL-1.0-or-later +# SPDX-FileCopyrightText: 2026 Jonathan D.A. Jewell +# +# ECHIDNA — run every scripts/extract_*.jl once, non-interactively. +# +# Assumes scripts/provision_corpora.sh has already populated +# external_corpora/; extractors whose upstream is absent short-circuit +# with a print line (they already handle missing inputs gracefully). +# +# Usage +# ----- +# scripts/extract_all.sh # run every extractor +# scripts/extract_all.sh NAME ... # run only the named extractors +# # (matches scripts/extract_.jl) +# scripts/extract_all.sh --list # list available extractors +# +# Environment +# ECHIDNA_JULIA_PROJECT — path passed to `julia --project=` +# (default: src/julia). +# +# Exit code is the number of extractors that failed (0 on clean run, +# N on partial failure). Suitable for a nightly cron. + +set -uo pipefail # NOTE: not `set -e` — we want to collect failures, not abort. + +ECHIDNA_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +JULIA_PROJECT="${ECHIDNA_JULIA_PROJECT:-$ECHIDNA_ROOT/src/julia}" + +list_extractors() { + ( cd "$ECHIDNA_ROOT/scripts" && ls extract_*.jl 2>/dev/null | sed 's/^extract_//; s/\.jl$//' ) +} + +# Handle info flags before probing the environment. +case "${1:-}" in + --list) + list_extractors + exit 0 + ;; + -h|--help) + sed -n '4,22p' "${BASH_SOURCE[0]}" | sed 's/^# \{0,1\}//' + exit 0 + ;; +esac + +if ! command -v julia >/dev/null 2>&1; then + echo "error: julia not on PATH; install Julia to run the extractors." >&2 + exit 127 +fi + +# Build the list of extractors to run. +if [[ $# -gt 0 ]]; then + targets=() + for name in "$@"; do + path="$ECHIDNA_ROOT/scripts/extract_${name}.jl" + if [[ ! -f "$path" ]]; then + echo "error: no such extractor: extract_${name}.jl" >&2 + exit 2 + fi + targets+=("$path") + done +else + mapfile -t targets < <(find "$ECHIDNA_ROOT/scripts" -maxdepth 1 -name 'extract_*.jl' | sort) +fi + +echo "Running ${#targets[@]} extractor(s) with --project=$JULIA_PROJECT" +echo + +failed=() +for script in "${targets[@]}"; do + name="$(basename "$script" .jl)" + echo "========================================" + echo " $name" + echo "========================================" + if julia --project="$JULIA_PROJECT" "$script"; then + echo " [ok] $name" + else + echo " [FAIL] $name" + failed+=("$name") + fi + echo +done + +if [[ ${#failed[@]} -gt 0 ]]; then + echo "Extractors that failed: ${failed[*]}" + exit "${#failed[@]}" +fi + +echo "All extractors completed." diff --git a/scripts/provision_corpora.sh b/scripts/provision_corpora.sh new file mode 100755 index 0000000..4f3f1bc --- /dev/null +++ b/scripts/provision_corpora.sh @@ -0,0 +1,281 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: PMPL-1.0-or-later +# SPDX-FileCopyrightText: 2026 Jonathan D.A. Jewell +# +# ECHIDNA — Non-interactive upstream corpus provisioning. +# +# Clones the upstream mirror of every theorem-prover corpus the extractors +# under scripts/extract_*.jl expect at external_corpora//. Each +# clone uses --depth=1 to keep the bandwidth bill finite (~4-8 GB total +# when all sources are selected). +# +# Usage +# ----- +# scripts/provision_corpora.sh [NAME ...] # provision the named sources +# scripts/provision_corpora.sh --all # provision every source +# scripts/provision_corpora.sh --list # print the source catalogue +# scripts/provision_corpora.sh --status # show which sources are on disk +# +# Selection names match the external_corpora// directory each extractor +# expects. Unknown names exit non-zero so typos fail loudly in CI. +# +# Behaviour +# * If external_corpora// already exists the source is skipped +# (idempotent; safe to re-run). Pass --force to re-clone into a +# fresh directory. +# * Network failures are retried once; a failure after that is logged +# and the script continues to the next source (so a single offline +# mirror doesn't abort a 40-source provisioning). +# * Designed to be callable from a Justfile recipe or a nightly CI +# workflow. No interactive prompts, ever. + +set -euo pipefail + +ECHIDNA_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +CORPORA_DIR="${ECHIDNA_CORPORA_DIR:-$ECHIDNA_ROOT/external_corpora}" +FORCE=0 + +# ------------------------------------------------------------------ +# Upstream catalogue +# +# Each entry: "|||" +# kind = git — `git clone --depth=1 /` +# kind = tar — `curl | tar xz` into / +# kind = skip — metadata only (proprietary / form-gated / non-public) +# +# Adding a source: +# 1. Add a row here with the real upstream URL. +# 2. Update scripts/extract_.jl to read from external_corpora/. +# ------------------------------------------------------------------ +CATALOGUE=( + # Interactive proof assistants + "abella|git|https://github.com/abella-prover/abella" + "agda-stdlib|git|https://github.com/agda/agda-stdlib" + "agda-cubical|git|https://github.com/agda/cubical" + "agda-unimath|git|https://github.com/UniMath/agda-unimath" + "arend|git|https://github.com/JetBrains/Arend" + "CoqGym|git|https://github.com/princeton-vl/CoqGym" + "mathcomp|git|https://github.com/math-comp/math-comp" + "mathcomp-analysis|git|https://github.com/math-comp/analysis" + "mathcomp-finmap|git|https://github.com/math-comp/finmap" + "mathcomp-algebra-tactics|git|https://github.com/math-comp/algebra-tactics" + "mathlib3|git|https://github.com/leanprover-community/mathlib" + "mathlib4|git|https://github.com/leanprover-community/mathlib4" + "hol4|git|https://github.com/HOL-Theorem-Prover/HOL" + "hol_light|git|https://github.com/jrh13/hol-light" + "fstar|git|https://github.com/FStarLang/FStar" + "idris2|git|https://github.com/idris-lang/Idris2" + "matita|git|https://github.com/rocq-prover/matita" + "mizar|git|https://github.com/MizarProject/mml" + "metamath|git|https://github.com/metamath/set.mm" + "opentheory|git|https://github.com/gilith/opentheory" + "pvs|git|https://github.com/nasa/pvslib" + "afp|git|https://foss.heptapod.net/isa-afp/afp-devel.git" + + # ACL2 family + "acl2|git|https://github.com/acl2/acl2" + "acl2s|git|https://github.com/acl2s/acl2s" + + # Auto-active verifiers + "dafny|git|https://github.com/dafny-lang/dafny" + "why3|git|https://gitlab.inria.fr/why3/why3" + "boogie|git|https://github.com/boogie-org/boogie" + "cameleer|git|https://github.com/ocaml-gospel/cameleer" + "framac|git|https://git.frama-c.com/pub/frama-c" + "viper|git|https://github.com/viperproject/silver" + "key|git|https://github.com/KeYProject/key" + + # Model checkers / CI verifiers + "cbmc|git|https://github.com/diffblue/cbmc" + "seahorn|git|https://github.com/seahorn/seahorn" + "spin|git|https://github.com/nimble-code/Spin" + "prism|git|https://github.com/prismmodelchecker/prism-examples" + "nusmv|skip|http://nusmv.fbk.eu/distrib/" # distro ships examples + "uppaal|skip|https://uppaal.org/documentation/" # commercial; examples gated + "tlaplus_examples|git|https://github.com/tlaplus/Examples" + + # SAT / SMT / FO-ATP benchmarks + "abc|git|https://github.com/berkeley-abc/abc" + "dreal4|git|https://github.com/dreal/dreal4" + "smtlib|skip|https://smtlib.cs.uiowa.edu/benchmarks.shtml" # tarball index + "sat_benchmarks|skip|https://www.satcompetition.org/" # annual tarballs + "tptp|skip|https://www.tptp.org/" # form-free tarball + + # Security protocol verifiers + "tamarin|git|https://github.com/tamarin-prover/tamarin-prover" + "proverif|git|https://gitlab.inria.fr/bblanche/proverif" + + # Constraint / optimisation + "alloy|git|https://github.com/AlloyTools/models" + "minizinc|git|https://github.com/MiniZinc/minizinc-benchmarks" + + # Logic frameworks / misc + "dedukti|git|https://github.com/Deducteam/Libraries" + "elpi-lang|git|https://github.com/LPCIC/elpi" + "naproche|git|https://github.com/naproche/naproche" + "mercury-lang|git|https://github.com/Mercury-Language/mercury" + "nunchaku|git|https://github.com/nunchaku-inria/nunchaku" + "athena|git|https://github.com/AthenaFoundation/athena" +) + +# ------------------------------------------------------------------ + +usage() { + sed -n '4,25p' "${BASH_SOURCE[0]}" | sed 's/^# \{0,1\}//' +} + +list_catalogue() { + printf "%-28s %-5s %s\n" "NAME" "KIND" "URL" + printf "%-28s %-5s %s\n" "----" "----" "---" + for entry in "${CATALOGUE[@]}"; do + IFS='|' read -r name kind url _ <<<"$entry" + printf "%-28s %-5s %s\n" "$name" "$kind" "$url" + done +} + +show_status() { + local present=0 + local missing=0 + printf "%-28s %-9s %s\n" "NAME" "STATUS" "SIZE" + printf "%-28s %-9s %s\n" "----" "------" "----" + for entry in "${CATALOGUE[@]}"; do + IFS='|' read -r name kind _ _ <<<"$entry" + local path="$CORPORA_DIR/$name" + if [[ -d "$path" ]]; then + local size + size="$(du -sh "$path" 2>/dev/null | awk '{print $1}')" + printf "%-28s %-9s %s\n" "$name" "present" "$size" + present=$((present + 1)) + elif [[ "$kind" == "skip" ]]; then + printf "%-28s %-9s %s\n" "$name" "manual" "(gated upstream)" + else + printf "%-28s %-9s %s\n" "$name" "missing" "-" + missing=$((missing + 1)) + fi + done + echo + echo "Summary: $present present, $missing missing, $(( ${#CATALOGUE[@]} - present - missing )) manual." + echo "Root: $CORPORA_DIR" +} + +find_entry() { + local want="$1" + for entry in "${CATALOGUE[@]}"; do + IFS='|' read -r name _ _ _ <<<"$entry" + if [[ "$name" == "$want" ]]; then + echo "$entry" + return 0 + fi + done + return 1 +} + +provision_one() { + local entry="$1" + IFS='|' read -r name kind url _ <<<"$entry" + local dest="$CORPORA_DIR/$name" + + if [[ "$kind" == "skip" ]]; then + echo " [skip] $name: provision manually from $url" + return 0 + fi + + if [[ -d "$dest" && $FORCE -eq 0 ]]; then + echo " [ok] $name: already present at $dest" + return 0 + fi + + if [[ -d "$dest" && $FORCE -eq 1 ]]; then + echo " [force] removing $dest" + rm -rf "$dest" + fi + + mkdir -p "$CORPORA_DIR" + + case "$kind" in + git) + echo " [clone] $name <- $url" + if ! git clone --depth=1 --filter=blob:none "$url" "$dest" 2>&1 | tail -n 3; then + echo " [retry] $name" + sleep 2 + if ! git clone --depth=1 "$url" "$dest" 2>&1 | tail -n 3; then + echo " [FAIL] $name: clone failed; continuing" + return 1 + fi + fi + ;; + tar) + echo " [tar] $name <- $url" + mkdir -p "$dest" + if ! curl -fsSL "$url" | tar xz -C "$dest" --strip-components=1; then + echo " [FAIL] $name: tarball extraction failed; continuing" + return 1 + fi + ;; + *) + echo " [WARN] unknown kind '$kind' for $name" + return 1 + ;; + esac + echo " [done] $name" +} + +provision_all() { + local ok=0 fail=0 skip=0 + for entry in "${CATALOGUE[@]}"; do + IFS='|' read -r _ kind _ _ <<<"$entry" + if [[ "$kind" == "skip" ]]; then + skip=$((skip + 1)) + provision_one "$entry" || true + continue + fi + if provision_one "$entry"; then + ok=$((ok + 1)) + else + fail=$((fail + 1)) + fi + done + echo + echo "Provisioning complete: $ok ok, $fail failed, $skip manual." +} + +# ------------------------------------------------------------------ + +case "${1:-}" in + ""|--help|-h) + usage + exit 0 + ;; + --list) + list_catalogue + exit 0 + ;; + --status) + show_status + exit 0 + ;; + --all) + shift + [[ "${1:-}" == "--force" ]] && { FORCE=1; shift; } + provision_all + ;; + --force) + FORCE=1 + shift + if [[ "${1:-}" == "--all" ]]; then + shift + provision_all + else + for name in "$@"; do + entry="$(find_entry "$name")" || { echo "unknown source: $name (run --list)"; exit 2; } + provision_one "$entry" || true + done + fi + ;; + *) + for name in "$@"; do + entry="$(find_entry "$name")" || { echo "unknown source: $name (run --list)"; exit 2; } + provision_one "$entry" || true + done + ;; +esac From 9f96287678a744fb6fb0307254f6a78c454ec8ed Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 19 Apr 2026 22:35:29 +0000 Subject: [PATCH 3/7] =?UTF-8?q?feat(just):=20corpus-refresh=20pipeline=20?= =?UTF-8?q?=E2=80=94=20provision=20=E2=86=92=20extract=20=E2=86=92=20retra?= =?UTF-8?q?in?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Justfile Three new recipes that compose the scripts landed in 9df340c into an idiomatic end-to-end retrain flow: just provision-corpora [NAMES...] # scripts/provision_corpora.sh just corpora-status # on-disk report just extract-corpora [NAMES...] # scripts/extract_all.sh just retrain # src/julia/run_training.jl just corpus-refresh # all three, in order Each step is idempotent; re-running the pipeline only touches what has changed. The retrain recipe honours the env-var knobs added in 5611cb2 (ECHIDNA_MAX_PROOF_STATES, ECHIDNA_NUM_EPOCHS, ECHIDNA_NUM_NEGATIVES), so: ECHIDNA_MAX_PROOF_STATES=0 just corpus-refresh re-baselines MRR against the full expanded corpus on GPU hardware. --- Justfile | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/Justfile b/Justfile index a34e4b0..0c6b437 100644 --- a/Justfile +++ b/Justfile @@ -74,6 +74,40 @@ vocab-canon: julia scripts/vocabulary_mine_corpus.jl julia scripts/vocabulary_canonicalize.jl +# ── Corpus / training pipeline ───────────────────────────────── +# +# Three steps in order: provision upstream mirrors, run every extractor, +# then retrain. Each step is idempotent; re-running only touches what +# has changed. See scripts/provision_corpora.sh --list for the source +# catalogue. + +# Clone every upstream prover corpus into external_corpora/. +# Pass specific names to provision a subset: `just provision-corpora metamath mathlib4`. +provision-corpora *NAMES: + @if [ -z "{{NAMES}}" ]; then \ + scripts/provision_corpora.sh --all; \ + else \ + scripts/provision_corpora.sh {{NAMES}}; \ + fi + +# Report which upstream corpora are on disk and their sizes. +corpora-status: + scripts/provision_corpora.sh --status + +# Run every scripts/extract_*.jl against the provisioned corpora. +# Pass names to run a subset: `just extract-corpora metamath mathlib4`. +extract-corpora *NAMES: + scripts/extract_all.sh {{NAMES}} + +# Full retrain from provisioned corpora. Honours ECHIDNA_MAX_PROOF_STATES +# (0 = unlimited), ECHIDNA_NUM_EPOCHS, ECHIDNA_NUM_NEGATIVES. +retrain: + julia --project=src/julia src/julia/run_training.jl + +# End-to-end pipeline: provision → extract → retrain. +# Use `ECHIDNA_MAX_PROOF_STATES=0 just corpus-refresh` to lift the sample cap. +corpus-refresh: provision-corpora extract-corpora retrain + # Run the eight-axis metrics suite against the current corpus and post # results to VeriSimDB. Falls back to training_data/metrics_.jsonl # if VERISIM_URL is unreachable. Target values are documented in From 7f9f62099bcf16c86ec316b5d21c037b4ba86771 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 19 Apr 2026 22:52:18 +0000 Subject: [PATCH 4/7] feat(verisim): first-class Proof / TacticApplication / ProofVersion / ProofStateRecord MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit src/rust/verification/proof.rs New module (verisim-feature-gated) materialising the four entities the E-R audit flagged as missing from the Verisim schema. Each closes one of the FK-discipline gaps identified in the 2026-04-19 audit: * Proof{id, goal_hash, theorem_hash, theorem_name, prover, first_seen} makes the SHA-256 digest already produced by proof_encoding::proof_identity() a first-class row. theorem_hash (new helper theorem_identity) and goal_hash let cross-prover and cross-goal joins index on fixed-width columns without re-deriving the triple from the primary key. * TacticApplication{proof_id, step, tactic, goal_hash, status, goals_after, duration_ms, applied_at} persists the proof DAG. Success/Closed/Error TacticStatus is kept (not just success) so MCTS/agentic search retains failed-branch training signal. * ProofVersion{proof_id, version, snapshot_id, goals_remaining, is_qed, actor, recorded_at} is the append-only audit log that OctadPayload.temporal.versions currently lacks — the latter is render-only; this is durable. `initial()` and `next()` chain monotonically so versions never collide. * ProofStateRecord{proof_id, step, goal_hash, prover, prover_version, state_cbor_b64, features, relevant_premises, label_confidence, source} is the typed corpus schema replacing the ad-hoc JSONL dict. state_cbor_b64 round-trips the tree structure the JSONL format loses; label_confidence separates human-verified ground truth from weak heuristics; prover_version attributes MRR deltas to solver builds. The module adds 8 unit tests (hash-derivation correctness, FK wiring, monotonic version chaining, QED marking, round-trip serde). Existing OctadPayload posting continues unchanged — these rows are intended to be written alongside, not instead of, the octad, via an incremental migration path. src/rust/verification/mod.rs Export the new types behind the existing `verisim` feature gate, matching how proof_encoding is gated. src/rust/proof_encoding.rs + src/rust/verisim_bridge.rs Drive-by fix: the `verisim` feature tests referenced ProverKind::Lean4 (doesn't exist; the Lean 4 variant is ProverKind::Lean, Lean 3 is the sibling ProverKind::Lean3) and a hardcoded "Lean4" Debug string. Restoring the 27 previously-uncompilable verisim-feature tests so the new proof.rs tests can share the harness. Test counts: 613 default, 640 with --features verisim (0 failures). --- src/rust/proof_encoding.rs | 6 +- src/rust/verification/mod.rs | 6 + src/rust/verification/proof.rs | 390 +++++++++++++++++++++++++++++++++ src/rust/verisim_bridge.rs | 11 +- 4 files changed, 405 insertions(+), 8 deletions(-) create mode 100644 src/rust/verification/proof.rs diff --git a/src/rust/proof_encoding.rs b/src/rust/proof_encoding.rs index c04aaa0..ebfdd91 100644 --- a/src/rust/proof_encoding.rs +++ b/src/rust/proof_encoding.rs @@ -154,8 +154,8 @@ mod tests { hypotheses: vec![], }; - let id1 = proof_identity("my_theorem", &goal, ProverKind::Lean4); - let id2 = proof_identity("my_theorem", &goal, ProverKind::Lean4); + let id1 = proof_identity("my_theorem", &goal, ProverKind::Lean); + let id2 = proof_identity("my_theorem", &goal, ProverKind::Lean); assert_eq!(id1, id2, "Same inputs must produce same identity"); } @@ -167,7 +167,7 @@ mod tests { hypotheses: vec![], }; - let lean = proof_identity("thm", &goal, ProverKind::Lean4); + let lean = proof_identity("thm", &goal, ProverKind::Lean); let coq = proof_identity("thm", &goal, ProverKind::Coq); assert_ne!( lean, coq, diff --git a/src/rust/verification/mod.rs b/src/rust/verification/mod.rs index 4adb299..2cad8f5 100644 --- a/src/rust/verification/mod.rs +++ b/src/rust/verification/mod.rs @@ -16,6 +16,8 @@ pub mod confidence; pub mod mutation; pub mod pareto; pub mod portfolio; +#[cfg(feature = "verisim")] +pub mod proof; pub mod statistics; pub use axiom_tracker::{AxiomPolicy, AxiomTracker, AxiomUsage, DangerLevel}; @@ -24,4 +26,8 @@ pub use confidence::TrustLevel; pub use mutation::{MutationKind, MutationResult, MutationTester}; pub use pareto::{ParetoFrontier, ProofCandidate, ProofObjective}; pub use portfolio::{PortfolioConfig, PortfolioResult, PortfolioSolver}; +#[cfg(feature = "verisim")] +pub use proof::{ + theorem_identity, Proof, ProofStateRecord, ProofVersion, TacticApplication, TacticStatus, +}; pub use statistics::StatisticsTracker; diff --git a/src/rust/verification/proof.rs b/src/rust/verification/proof.rs new file mode 100644 index 0000000..ea631a1 --- /dev/null +++ b/src/rust/verification/proof.rs @@ -0,0 +1,390 @@ +// SPDX-FileCopyrightText: 2026 Jonathan D.A. Jewell (hyperpolymath) +// SPDX-License-Identifier: PMPL-1.0-or-later + +//! Persistent proof entities for the Verisim E-R schema. +//! +//! The existing `proof_encoding::proof_identity` function produces a stable +//! SHA-256 hex digest for every `(theorem, goal, prover)` triple, but that +//! digest lives only as `OctadPayload.key`. Downstream entities +//! (ProofAttempt, PortfolioResult, MutationResult, ParetoEntry, certificates, +//! integrity records, training corpora) cannot FK into a `Proof` row because +//! no `Proof` row exists — they each carry their own loose references to +//! theorem names and prover strings. This module materialises the missing +//! entities so the proof territory has an actual map: +//! +//! * [`Proof`] — stable primary key with explicit +//! `theorem_hash` and `goal_hash` for cross-prover +//! and cross-goal joins. +//! * [`TacticApplication`] — one row per tactic step; makes the proof DAG +//! reconstructable. +//! * [`ProofVersion`] — append-only snapshot log so the temporal +//! evolution of a proof is durable, not just +//! renderable in `OctadPayload.temporal`. +//! * [`ProofStateRecord`] — typed training-corpus schema replacing the +//! untyped JSONL dict. +//! +//! All four types serialise to JSON/CBOR and are intended to be posted to +//! Verisim alongside — not instead of — the existing `OctadPayload`. An +//! incremental migration strategy keeps the current octad-posting path +//! working while downstream consumers start reading the new FK-structured +//! tables. + +use chrono::Utc; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; + +use crate::core::{Goal, ProofState, Tactic}; +use crate::proof_encoding::{goal_identity, proof_identity, session_identity}; +use crate::provers::ProverKind; + +// ═══════════════════════════════════════════════════════════════════════ +// Theorem identity — rename-stable hash of the theorem name. +// ═══════════════════════════════════════════════════════════════════════ + +/// SHA-256 hex digest of the theorem name, used as `Proof.theorem_hash`. +/// +/// The explicit hash lets queries like "all proofs of theorem X" index on +/// a fixed-width column instead of a variable-length name string. Rename +/// drift still affects the hash — if the *name* changes, the hash changes — +/// but that is the correct semantics: a renamed theorem is a different +/// entity from the E-R perspective. +pub fn theorem_identity(theorem_name: &str) -> String { + let input = format!("echidna:v1:theorem:{}", theorem_name); + format!("{:x}", Sha256::digest(input.as_bytes())) +} + +// ═══════════════════════════════════════════════════════════════════════ +// Proof — first-class entity materialising the identity triple. +// ═══════════════════════════════════════════════════════════════════════ + +/// A proof of a specific goal by a specific prover of a specific theorem. +/// +/// `id` is the primary key: the SHA-256 digest produced by +/// `proof_encoding::proof_identity`, identical to `OctadPayload.key`. Same +/// `(theorem, goal, prover)` triple always produces the same id, which is +/// what makes the entity content-addressed. +/// +/// `theorem_hash` and `goal_hash` are independent digests kept alongside the +/// primary key so queries can join on them directly without re-deriving +/// the triple from `id`. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub struct Proof { + /// Primary key — `proof_encoding::proof_identity` hex digest. + pub id: String, + /// `goal_identity` digest (prover-agnostic, cross-prover joins). + pub goal_hash: String, + /// `theorem_identity` digest. + pub theorem_hash: String, + /// Human-readable theorem name (display only; joins use `theorem_hash`). + pub theorem_name: String, + /// Prover that produced this proof. + pub prover: ProverKind, + /// ISO-8601 timestamp when the row was first materialised. + pub first_seen: String, +} + +impl Proof { + /// Derive a `Proof` from its source triple. All three hashes are + /// computed here so no caller can supply a mismatched set. + pub fn new(theorem_name: &str, goal: &Goal, prover: ProverKind) -> Self { + Proof { + id: proof_identity(theorem_name, goal, prover), + goal_hash: goal_identity(theorem_name, goal), + theorem_hash: theorem_identity(theorem_name), + theorem_name: theorem_name.to_string(), + prover, + first_seen: Utc::now().to_rfc3339(), + } + } +} + +// ═══════════════════════════════════════════════════════════════════════ +// TacticApplication — per-step record of the proof DAG. +// ═══════════════════════════════════════════════════════════════════════ + +/// Outcome of a tactic application. +/// +/// MCTS and agentic search gain training signal from failed applications as +/// well as successful ones, so the schema persists both. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub enum TacticStatus { + /// Tactic produced a new state; further goals may remain. + Success, + /// Tactic closed every remaining goal (QED). + Closed, + /// Tactic failed; the error message is preserved verbatim. + Error(String), +} + +/// A single tactic applied to a state, with the status and the post-step +/// goal count. Rows are indexed by `proof_id` and ordered by `step` so the +/// full tactic DAG of a proof can be reconstructed. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TacticApplication { + /// FK → [`Proof::id`]. + pub proof_id: String, + /// Monotonic step index (0-based) within the proof. + pub step: u32, + /// The tactic that was applied. + pub tactic: Tactic, + /// `goal_identity` digest of the goal the tactic targeted. + pub goal_hash: String, + /// Outcome of the application. + pub status: TacticStatus, + /// Number of goals remaining after the tactic (0 = QED for this branch). + pub goals_after: u32, + /// Wall-clock duration for the application, in milliseconds. + pub duration_ms: u64, + /// ISO-8601 timestamp when the tactic was applied. + pub applied_at: String, +} + +impl TacticApplication { + /// Build a `TacticApplication` row for a step that closed some subset of + /// the goal tree. `goal` identifies which specific goal was targeted; + /// `goals_after` is taken from the post-tactic state. + pub fn record( + proof: &Proof, + step: u32, + goal: &Goal, + tactic: &Tactic, + status: TacticStatus, + goals_after: u32, + duration_ms: u64, + ) -> Self { + TacticApplication { + proof_id: proof.id.clone(), + step, + tactic: tactic.clone(), + goal_hash: goal_identity(&proof.theorem_name, goal), + status, + goals_after, + duration_ms, + applied_at: Utc::now().to_rfc3339(), + } + } +} + +// ═══════════════════════════════════════════════════════════════════════ +// ProofVersion — append-only snapshot log. +// ═══════════════════════════════════════════════════════════════════════ + +/// Append-only snapshot of a proof's state at a point in time. +/// +/// `OctadPayload.temporal.versions` is render-only: it reflects the current +/// in-memory state of the proof. Persisting `ProofVersion` rows gives a +/// durable audit log that survives solver restarts and lets Verisim replay +/// how a proof evolved. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ProofVersion { + /// FK → [`Proof::id`]. + pub proof_id: String, + /// Monotonic version number (0 = initial state). + pub version: u32, + /// `session_identity` digest unique per `(proof_id, version)`. + pub snapshot_id: String, + /// Number of goals remaining in this snapshot. + pub goals_remaining: u32, + /// Whether this snapshot closes the proof. + pub is_qed: bool, + /// Actor: e.g. `"echidna-auto"`, `"mcts"`, `"user:jdaj"`. + pub actor: String, + /// ISO-8601 timestamp. + pub recorded_at: String, +} + +impl ProofVersion { + /// Initial snapshot (`version = 0`) of a newly-created proof. + pub fn initial(proof: &Proof, state: &ProofState, actor: &str) -> Self { + let ts = Utc::now().timestamp(); + let seed = format!("{}:v0", proof.id); + ProofVersion { + proof_id: proof.id.clone(), + version: 0, + snapshot_id: session_identity(&seed, ts), + goals_remaining: state.goals.len() as u32, + is_qed: state.is_complete(), + actor: actor.to_string(), + recorded_at: Utc::now().to_rfc3339(), + } + } + + /// Successor snapshot; `prev` supplies the `proof_id` and version + /// counter. Passing a stale `prev` will produce a correctly-chained + /// `version + 1` regardless of real-world timing drift. + pub fn next(prev: &ProofVersion, state: &ProofState, actor: &str) -> Self { + let ts = Utc::now().timestamp(); + let next_version = prev.version + 1; + let seed = format!("{}:v{}", prev.proof_id, next_version); + ProofVersion { + proof_id: prev.proof_id.clone(), + version: next_version, + snapshot_id: session_identity(&seed, ts), + goals_remaining: state.goals.len() as u32, + is_qed: state.is_complete(), + actor: actor.to_string(), + recorded_at: Utc::now().to_rfc3339(), + } + } +} + +// ═══════════════════════════════════════════════════════════════════════ +// ProofStateRecord — typed corpus schema. +// ═══════════════════════════════════════════════════════════════════════ + +/// Typed training-corpus record. +/// +/// Replaces the ad-hoc JSONL dict the Julia extractors emit today. A +/// `ProofStateRecord` is one example per `(proof_id, step)` pair and +/// carries everything the trainer needs without per-prover divergence: +/// +/// * `proof_id` + `step` connect the record back to the `Proof` and +/// `TacticApplication` rows for provenance. +/// * `prover_version` captures the solver build so MRR deltas across +/// versions can be attributed. +/// * `state_cbor_b64` is a round-trippable `ProofState` blob so the +/// trainer can reconstruct the pre-tactic state exactly — the JSONL +/// format loses tree structure. +/// * `label_confidence` separates human-verified ground truth from +/// weakly-labelled heuristics. +/// +/// Existing extractors can keep emitting legacy JSONL; new extractors +/// should emit this schema so downstream consumers gain FK discipline. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ProofStateRecord { + /// FK → [`Proof::id`]. + pub proof_id: String, + /// Matches [`TacticApplication::step`]. + pub step: u32, + /// `goal_identity` digest for this step. + pub goal_hash: String, + /// Lowercase prover tag, matching `prover_kind_to_str`. + pub prover: String, + /// Solver version string, e.g. `"4.12.0"`. Empty when unknown. + pub prover_version: String, + /// CBOR-encoded `ProofState`, base64-armored for JSONL transport. + pub state_cbor_b64: String, + /// Optional pre-computed feature vector. `None` if the extractor did + /// not embed features and the trainer is expected to derive them. + pub features: Option>, + /// Relevant premise names — the label for premise-selection training. + pub relevant_premises: Vec, + /// Confidence in the label (0.0 = heuristic, 1.0 = human-verified). + pub label_confidence: f32, + /// Upstream source path, e.g. `"external_corpora/mathlib4/Mathlib/..."`. + pub source: String, +} + +// ═══════════════════════════════════════════════════════════════════════ +// Tests +// ═══════════════════════════════════════════════════════════════════════ + +#[cfg(test)] +mod tests { + use super::*; + use crate::core::Term; + + fn sample_goal() -> Goal { + Goal { + id: "g0".to_string(), + target: Term::Const("True".to_string()), + hypotheses: vec![], + } + } + + #[test] + fn proof_id_matches_proof_identity() { + let goal = sample_goal(); + let p = Proof::new("mythm", &goal, ProverKind::Lean); + assert_eq!(p.id, proof_identity("mythm", &goal, ProverKind::Lean)); + } + + #[test] + fn proof_hashes_are_distinct() { + let goal = sample_goal(); + let p = Proof::new("mythm", &goal, ProverKind::Lean); + assert_ne!(p.id, p.goal_hash); + assert_ne!(p.id, p.theorem_hash); + assert_ne!(p.goal_hash, p.theorem_hash); + } + + #[test] + fn theorem_identity_is_deterministic() { + assert_eq!(theorem_identity("foo"), theorem_identity("foo")); + assert_ne!(theorem_identity("foo"), theorem_identity("bar")); + assert_eq!(theorem_identity("foo").len(), 64); + } + + #[test] + fn proof_cross_prover_share_goal_and_theorem_hash() { + let goal = sample_goal(); + let lean = Proof::new("thm", &goal, ProverKind::Lean); + let coq = Proof::new("thm", &goal, ProverKind::Coq); + assert_ne!(lean.id, coq.id); + assert_eq!(lean.goal_hash, coq.goal_hash); + assert_eq!(lean.theorem_hash, coq.theorem_hash); + } + + #[test] + fn tactic_application_records_fk() { + let goal = sample_goal(); + let p = Proof::new("thm", &goal, ProverKind::Z3); + let tac = Tactic::Reflexivity; + let app = TacticApplication::record(&p, 0, &goal, &tac, TacticStatus::Closed, 0, 12); + assert_eq!(app.proof_id, p.id); + assert_eq!(app.step, 0); + assert_eq!(app.goals_after, 0); + assert_eq!(app.status, TacticStatus::Closed); + assert_eq!(app.goal_hash, goal_identity("thm", &goal)); + } + + #[test] + fn proof_version_chains_monotonically() { + let goal = sample_goal(); + let p = Proof::new("thm", &goal, ProverKind::Z3); + let state = ProofState::new(goal.target.clone()); + let v0 = ProofVersion::initial(&p, &state, "echidna-auto"); + let v1 = ProofVersion::next(&v0, &state, "echidna-auto"); + let v2 = ProofVersion::next(&v1, &state, "echidna-auto"); + assert_eq!(v0.version, 0); + assert_eq!(v1.version, 1); + assert_eq!(v2.version, 2); + assert_eq!(v0.proof_id, p.id); + assert_eq!(v1.proof_id, p.id); + assert_ne!(v0.snapshot_id, v1.snapshot_id); + } + + #[test] + fn proof_version_marks_qed_when_state_is_complete() { + let goal = sample_goal(); + let p = Proof::new("thm", &goal, ProverKind::Z3); + let open = ProofState::new(goal.target.clone()); + let closed = ProofState::default(); + let v_open = ProofVersion::initial(&p, &open, "mcts"); + let v_closed = ProofVersion::next(&v_open, &closed, "mcts"); + assert!(!v_open.is_qed); + assert!(v_closed.is_qed); + assert_eq!(v_closed.goals_remaining, 0); + } + + #[test] + fn proof_state_record_round_trips_via_serde() { + let rec = ProofStateRecord { + proof_id: "abc".to_string(), + step: 3, + goal_hash: "def".to_string(), + prover: "lean".to_string(), + prover_version: "4.12.0".to_string(), + state_cbor_b64: "Zm9v".to_string(), + features: Some(vec![0.1, 0.2, 0.3]), + relevant_premises: vec!["Nat.add_comm".to_string()], + label_confidence: 0.9, + source: "external_corpora/mathlib4/foo.lean".to_string(), + }; + let json = serde_json::to_string(&rec).unwrap(); + let back: ProofStateRecord = serde_json::from_str(&json).unwrap(); + assert_eq!(back.proof_id, rec.proof_id); + assert_eq!(back.features, rec.features); + assert!((back.label_confidence - rec.label_confidence).abs() < 1e-6); + } +} diff --git a/src/rust/verisim_bridge.rs b/src/rust/verisim_bridge.rs index 2ad22d9..973e501 100644 --- a/src/rust/verisim_bridge.rs +++ b/src/rust/verisim_bridge.rs @@ -252,7 +252,7 @@ pub struct SpatialPayload { /// /// Usage: /// ```ignore -/// let octad = ProofOctadBuilder::new("my_theorem", &goal, ProverKind::Lean4) +/// let octad = ProofOctadBuilder::new("my_theorem", &goal, ProverKind::Lean) /// .with_proof_state(&proof_state) /// .with_axioms(vec!["Classical.em".to_string()]) /// .with_aspects(vec!["logic".to_string()]) @@ -921,7 +921,7 @@ mod tests { let goal = sample_goal(); let proof = sample_proof_state(); - let octad = ProofOctadBuilder::new("nat_add_zero", &goal, ProverKind::Lean4) + let octad = ProofOctadBuilder::new("nat_add_zero", &goal, ProverKind::Lean) .with_proof_state(&proof) .with_axioms(vec!["Nat.rec".to_string()]) .with_aspects(vec!["arithmetic".to_string(), "induction".to_string()]) @@ -932,9 +932,10 @@ mod tests { // Key should be a 64-char hex digest assert_eq!(octad.key.len(), 64); - // Semantic modality + // Semantic modality (ProverKind::Lean is the Lean 4 variant; Lean 3 + // is a sibling ProverKind::Lean3.) assert_eq!(octad.semantic.status, ProofStatus::Complete); - assert_eq!(octad.semantic.prover, "Lean4"); + assert_eq!(octad.semantic.prover, "Lean"); assert!(!octad.semantic.proof_blob_b64.is_empty()); assert_eq!(octad.semantic.axioms_used, vec!["Nat.rec"]); @@ -964,7 +965,7 @@ mod tests { // Graph modality assert_eq!(octad.graph.cross_prover_id.len(), 64); - assert!(octad.graph.prover_id.contains("Lean4")); + assert!(octad.graph.prover_id.contains("Lean")); // Tensor modality assert_eq!(*octad.tensor.metrics.get("time_ms").unwrap(), 42.0); From 13116fc1a0dbff6a561b763b679ec2f32faed12a Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 20 Apr 2026 06:17:55 +0000 Subject: [PATCH 5/7] feat(corpus): align premise proof_ids with UNIFIED after merge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit scripts/align_premises.jl New Julia script that reconciles the three-way id-space bug found during today's retrain: * per-prover extractors emit proof_state.id and matching premise.proof_id sharing a local integer id space * scripts/merge_corpus.jl:350-352 dedupes proof_states by (prover, theorem) and rewrites every id to a fresh sequential counter 1..N in proof_states_UNIFIED.jsonl * the premise files retain their original extractor ids, so the dataloader's proof_id join matches ~0% of records — today's retrain saw 1 392/50 000 = 2.8% coverage Fix: rebuild premises_COMPLETE.jsonl keyed on (canonical_prover, theorem) — the same pair merge_corpus.jl dedupes on, which is durable across the id rewrite. For every premise record, look up the fresh id assigned by the merge and rewrite proof_id := UNIFIED_id. Records whose theorem is not in UNIFIED get dropped and counted. Expected coverage jump: 2.8% → ~60–80% of proof_states with at least one matched premise, ~20-30x more training examples. Canonicalisation table held in lockstep with merge_corpus.jl's PROVER_CANONICAL. Stats written to premises_ALIGNED_stats.json alongside the output. Justfile Add `merge-corpora` (runs scripts/merge_corpus.jl) and `align-premises` (runs the new script) recipes and insert them into the corpus-refresh chain between extract-corpora and retrain, so the full pipeline is now: provision → extract → merge → align → retrain. Individual recipes remain callable for targeted work. --- Justfile | 27 ++++- scripts/align_premises.jl | 249 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 270 insertions(+), 6 deletions(-) create mode 100755 scripts/align_premises.jl diff --git a/Justfile b/Justfile index 0c6b437..3d52c6b 100644 --- a/Justfile +++ b/Justfile @@ -76,10 +76,11 @@ vocab-canon: # ── Corpus / training pipeline ───────────────────────────────── # -# Three steps in order: provision upstream mirrors, run every extractor, -# then retrain. Each step is idempotent; re-running only touches what -# has changed. See scripts/provision_corpora.sh --list for the source -# catalogue. +# Five steps in order: provision upstream mirrors, run every extractor, +# merge per-prover proof_states into UNIFIED, align the premise files +# to UNIFIED's fresh ids, then retrain. Each step is idempotent; +# re-running only touches what has changed. See +# scripts/provision_corpora.sh --list for the source catalogue. # Clone every upstream prover corpus into external_corpora/. # Pass specific names to provision a subset: `just provision-corpora metamath mathlib4`. @@ -99,14 +100,28 @@ corpora-status: extract-corpora *NAMES: scripts/extract_all.sh {{NAMES}} +# Merge per-prover proof_states_*.jsonl into proof_states_UNIFIED.jsonl +# with fresh sequential ids (dedupes by prover+theorem). +merge-corpora: + julia --project=src/julia scripts/merge_corpus.jl + +# Rebuild premises_COMPLETE.jsonl with proof_ids that match UNIFIED. +# merge-corpora rewrites every proof_state id to a fresh sequential +# counter; the premise files keep the original extractor ids, so this +# step re-joins premises to UNIFIED via (prover, theorem) — the durable +# key merge_corpus.jl already dedupes on. Without this step the +# dataloader's proof_id join matches ~0% of records. +align-premises: + julia --project=src/julia scripts/align_premises.jl + # Full retrain from provisioned corpora. Honours ECHIDNA_MAX_PROOF_STATES # (0 = unlimited), ECHIDNA_NUM_EPOCHS, ECHIDNA_NUM_NEGATIVES. retrain: julia --project=src/julia src/julia/run_training.jl -# End-to-end pipeline: provision → extract → retrain. +# End-to-end pipeline: provision → extract → merge → align → retrain. # Use `ECHIDNA_MAX_PROOF_STATES=0 just corpus-refresh` to lift the sample cap. -corpus-refresh: provision-corpora extract-corpora retrain +corpus-refresh: provision-corpora extract-corpora merge-corpora align-premises retrain # Run the eight-axis metrics suite against the current corpus and post # results to VeriSimDB. Falls back to training_data/metrics_.jsonl diff --git a/scripts/align_premises.jl b/scripts/align_premises.jl new file mode 100755 index 0000000..4620329 --- /dev/null +++ b/scripts/align_premises.jl @@ -0,0 +1,249 @@ +#!/usr/bin/env julia +# SPDX-License-Identifier: PMPL-1.0-or-later +# SPDX-FileCopyrightText: 2026 Jonathan D.A. Jewell (hyperpolymath) +# +# align_premises.jl — rebuild training_data/premises_COMPLETE.jsonl with proof_ids +# that match the fresh sequential ids written by merge_corpus.jl into +# training_data/proof_states_UNIFIED.jsonl. +# +# Background +# ---------- +# The per-prover extractors (scripts/extract_*.jl and scripts/max_extract_*.jl) +# emit proof_states and premises sharing a per-extractor-local integer id space: +# +# {"id": 60123, "prover": "Lean", "theorem": "Foo.bar", ...} (proof_state) +# {"proof_id": 60123, "prover": "Lean", "theorem": "Foo.bar", ...} (premise) +# +# scripts/merge_corpus.jl then writes proof_states_UNIFIED.jsonl after dedup +# by (prover, theorem) and **rewrites every proof_state's id to a fresh +# sequential counter (1..N)**. The premise files retain the original +# extractor ids, so proof_state.id and premise.proof_id no longer share an id +# space — the dataloader's join by proof_id matches ~0% of records. +# +# This script rebuilds premises_COMPLETE.jsonl using the +# (canonical_prover, theorem) pair as the durable join key (which merge_corpus +# already guarantees is unique within UNIFIED). For every premise record, +# we look up the fresh id assigned by merge_corpus and rewrite +# `proof_id := UNIFIED_id`. Records whose (prover, theorem) does not appear +# in UNIFIED are dropped and counted. +# +# Usage +# ----- +# julia --project=src/julia scripts/align_premises.jl +# +# Outputs +# ------- +# training_data/premises_COMPLETE.jsonl — aligned premises +# training_data/premises_ALIGNED_stats.json — coverage report +# +# Run this after merge_corpus.jl and before run_training.jl. + +using JSON3 + +# --------------------------------------------------------------------------- +# Paths +# --------------------------------------------------------------------------- + +const REPO_ROOT = dirname(dirname(abspath(@__FILE__))) +const TRAINING_DIR = joinpath(REPO_ROOT, "training_data") + +const UNIFIED_PATH = joinpath(TRAINING_DIR, "proof_states_UNIFIED.jsonl") +const OUT_PATH = joinpath(TRAINING_DIR, "premises_COMPLETE.jsonl") +const STATS_PATH = joinpath(TRAINING_DIR, "premises_ALIGNED_stats.json") + +# Every premises_*.jsonl in training_data/ is a candidate source, except the +# output file itself and any ALIGNED_stats JSON we emit. We also skip the +# timestamped `premises_.jsonl` snapshots by default but include +# them when ECHIDNA_ALIGN_INCLUDE_DATED=1. +function collect_premise_sources() + sources = String[] + for f in readdir(TRAINING_DIR; join=true) + base = basename(f) + startswith(base, "premises_") || continue + endswith(base, ".jsonl") || continue + base == "premises_COMPLETE.jsonl" && continue + if get(ENV, "ECHIDNA_ALIGN_INCLUDE_DATED", "0") != "1" && + occursin(r"^premises_\d{4}-\d{2}-\d{2}\.jsonl$", base) + continue + end + push!(sources, f) + end + sort!(sources) + return sources +end + +# --------------------------------------------------------------------------- +# Prover-name canonicalisation — must match merge_corpus.jl's mapping. +# Keep this in lockstep with scripts/merge_corpus.jl's PROVER_CANONICAL. +# --------------------------------------------------------------------------- + +const PROVER_CANONICAL = Dict( + "Lean4" => "Lean", + "Rocq" => "Coq", + "Fstar" => "F*", + "CVC4" => "CVC5", + "AltErgo" => "Alt-Ergo", + "HOL Light" => "HOLLight", + "E Prover" => "EProver", + "OR-Tools" => "ORTools", +) + +canon(prover::AbstractString) = get(PROVER_CANONICAL, String(prover), String(prover)) + +# --------------------------------------------------------------------------- +# Build the (canonical_prover, theorem) → fresh_id index from UNIFIED. +# --------------------------------------------------------------------------- + +function build_unified_index(path::String) + isfile(path) || error("missing $(path); run scripts/merge_corpus.jl first.") + index = Dict{Tuple{String,String},Int}() + open(path, "r") do fh + for line in eachline(fh) + isempty(strip(line)) && continue + entry = try + JSON3.read(line, Dict{String,Any}) + catch + continue + end + prover = canon(get(entry, "prover", "")) + theorem = String(get(entry, "theorem", "")) + new_id = get(entry, "id", nothing) + (isempty(prover) || isempty(theorem) || new_id === nothing) && continue + key = (prover, theorem) + # First-write wins. merge_corpus already dedupes by (prover,theorem) + # so collisions here would indicate a merge bug — report and keep + # the earlier id. + if haskey(index, key) && index[key] != new_id + @warn "duplicate (prover,theorem) in UNIFIED" prover=prover theorem=theorem first=index[key] second=new_id + continue + end + index[key] = Int(new_id) + end + end + return index +end + +# --------------------------------------------------------------------------- +# Walk every premise source, rewrite proof_id, and emit premises_COMPLETE. +# --------------------------------------------------------------------------- + +function align_premises(index::Dict{Tuple{String,String},Int}, sources::Vector{String}) + written = 0 + dropped_no_theorem = 0 + dropped_no_match = 0 + dropped_malformed = 0 + per_source = Dict{String,Dict{String,Int}}() + + open(OUT_PATH, "w") do out + for src in sources + s_written = 0 + s_dropped = 0 + open(src, "r") do fh + for line in eachline(fh) + isempty(strip(line)) && continue + rec = try + JSON3.read(line, Dict{String,Any}) + catch + dropped_malformed += 1 + s_dropped += 1 + continue + end + prover = canon(get(rec, "prover", "")) + theorem = String(get(rec, "theorem", "")) + if isempty(prover) || isempty(theorem) + dropped_no_theorem += 1 + s_dropped += 1 + continue + end + new_id = get(index, (prover, theorem), nothing) + if new_id === nothing + dropped_no_match += 1 + s_dropped += 1 + continue + end + rec["proof_id"] = new_id + println(out, JSON3.write(rec)) + written += 1 + s_written += 1 + end + end + per_source[basename(src)] = Dict( + "written" => s_written, + "dropped" => s_dropped, + ) + end + end + + return ( + written = written, + dropped_no_theorem = dropped_no_theorem, + dropped_no_match = dropped_no_match, + dropped_malformed = dropped_malformed, + per_source = per_source, + ) +end + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +function main() + println("align_premises — reconcile premise proof_ids with proof_states_UNIFIED") + println("=" ^ 72) + + println("\nBuilding (prover, theorem) → fresh_id index from UNIFIED...") + index = build_unified_index(UNIFIED_PATH) + println(" $(length(index)) (prover, theorem) pairs loaded.") + + sources = collect_premise_sources() + println("\nPremise sources ($(length(sources)) files):") + for s in sources + println(" $(basename(s))") + end + + println("\nAligning...") + result = align_premises(index, sources) + + total_in = result.written + result.dropped_no_theorem + + result.dropped_no_match + result.dropped_malformed + match_rate = total_in == 0 ? 0.0 : 100.0 * result.written / total_in + + println("\nResults") + println("-" ^ 72) + println(" Wrote : $(result.written) records to $(OUT_PATH)") + println(" Dropped (no key) : $(result.dropped_no_theorem) (missing prover or theorem)") + println(" Dropped (no match): $(result.dropped_no_match) (theorem not in UNIFIED)") + println(" Dropped (bad json): $(result.dropped_malformed)") + println(" Match rate : $(round(match_rate; digits=2))%") + + println("\nPer source:") + for (name, counts) in sort(collect(result.per_source); by = p -> p[1]) + println(" $(rpad(name, 40)) written=$(counts["written"]) dropped=$(counts["dropped"])") + end + + # Persist a machine-readable stats file alongside the output. + stats = Dict( + "unified_path" => UNIFIED_PATH, + "output_path" => OUT_PATH, + "unified_index_size" => length(index), + "sources" => [basename(s) for s in sources], + "written" => result.written, + "dropped_no_theorem" => result.dropped_no_theorem, + "dropped_no_match" => result.dropped_no_match, + "dropped_malformed" => result.dropped_malformed, + "match_rate_percent" => match_rate, + "per_source" => result.per_source, + ) + open(STATS_PATH, "w") do fh + JSON3.pretty(fh, stats) + end + println("\nStats written to $(STATS_PATH)") + + if result.written == 0 + println("\nERROR: 0 records written. Check that the premise files carry 'prover' and 'theorem' fields.") + exit(2) + end + return 0 +end + +main() From 7d417cf455d03014281023597173e3863c2e143f Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 20 Apr 2026 10:13:12 +0000 Subject: [PATCH 6/7] chore(lints): clear all 4 stable + 2 verisim cargo warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit src/rust/agent/memory.rs, src/rust/vcl_ut.rs Drop `warn` from the default-feature `tracing` import — the macro is only called inside #[cfg(feature = "verisim")] blocks. Re-import it behind the same gate so verisim builds keep compiling. src/rust/vcl_ut.rs Move the `let theorem = ...` binding inside the existing #[cfg(feature = "verisim")] block at execute_find_proof — it's only used by the verisim octad-key path. Removes the unused-variable warning under the default feature without sprinkling _theorem. src/rust/provers/mod.rs Drop the `"lp"` arm from the Dedukti file-extension match (line 1359). `.lp` is already claimed by GLPK seventeen lines above, so the Dedukti branch was unreachable. Inline a comment explaining the .lp ambiguity (LP/MIP vs lambdapi) and pointing readers at detect_from_file_content() for content-aware disambiguation. src/rust/verisim_bridge.rs Drop unused `info` from the tracing import (only debug + warn are called) and unused `Tactic` from the top-level core import (only the test module references it; added there explicitly instead). Test counts unchanged: 613 default, 640 verisim, 0 failures. --- src/rust/agent/memory.rs | 4 +++- src/rust/provers/mod.rs | 6 +++++- src/rust/vcl_ut.rs | 7 ++++--- src/rust/verisim_bridge.rs | 6 +++--- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/rust/agent/memory.rs b/src/rust/agent/memory.rs index 75fd294..a100211 100644 --- a/src/rust/agent/memory.rs +++ b/src/rust/agent/memory.rs @@ -12,7 +12,9 @@ use async_trait::async_trait; use serde::{Deserialize, Serialize}; use std::path::PathBuf; use tokio::sync::RwLock; -use tracing::{debug, info, warn}; +use tracing::{debug, info}; +#[cfg(feature = "verisim")] +use tracing::warn; use super::AgenticGoal; use crate::core::ProofState; diff --git a/src/rust/provers/mod.rs b/src/rust/provers/mod.rs index 5c054e8..2caa11f 100644 --- a/src/rust/provers/mod.rs +++ b/src/rust/provers/mod.rs @@ -1362,7 +1362,11 @@ impl ProverFactory { // Use detect_from_file_content() for Lean 3 vs 4 disambiguation. "lean3" => Some(ProverKind::Lean3), // explicit extension "thm" => Some(ProverKind::Abella), // Abella .thm files - "dk" | "lp" => Some(ProverKind::Dedukti), // Dedukti / λΠ + // Dedukti uses .dk; .lp (lambdapi dialect) is shadowed by GLPK above + // since LP/MIP files dominate that extension in the wild — a .lp + // input ambiguous between lambdapi and linear programming is + // resolved as GLPK. Use detect_from_file_content() to disambiguate. + "dk" => Some(ProverKind::Dedukti), // Dedukti / λΠ "bpl" => Some(ProverKind::Boogie), // Boogie intermediate language "ftl" => Some(ProverKind::Naproche), // Naproche controlled-NL "ma" => Some(ProverKind::Matita), // Matita proof file diff --git a/src/rust/vcl_ut.rs b/src/rust/vcl_ut.rs index 1689afa..c648e8a 100644 --- a/src/rust/vcl_ut.rs +++ b/src/rust/vcl_ut.rs @@ -22,7 +22,9 @@ use anyhow::{Context, Result}; use serde::{Deserialize, Serialize}; -use tracing::{debug, info, warn}; +use tracing::{debug, info}; +#[cfg(feature = "verisim")] +use tracing::warn; use crate::provers::ProverKind; @@ -493,12 +495,11 @@ impl QueryExecutor { /// Find a specific proof by theorem name and optional prover. async fn execute_find_proof(&self, query: &ProofQuery) -> Result { - let theorem = query.theorem_name.as_deref().unwrap_or(""); - #[cfg(feature = "verisim")] if let Some(prover) = query.prover { // Generate the octad key and look it up directly if let Some(ref goal_display) = query.goal_display { + let theorem = query.theorem_name.as_deref().unwrap_or(""); let goal = Goal { id: "query".to_string(), target: crate::core::Term::Var(goal_display.clone()), diff --git a/src/rust/verisim_bridge.rs b/src/rust/verisim_bridge.rs index 973e501..687ce30 100644 --- a/src/rust/verisim_bridge.rs +++ b/src/rust/verisim_bridge.rs @@ -23,9 +23,9 @@ use anyhow::{Context, Result}; use chrono::Utc; use serde::{Deserialize, Serialize}; use std::collections::HashMap; -use tracing::{debug, info, warn}; +use tracing::{debug, warn}; -use crate::core::{Goal, ProofState, Tactic}; +use crate::core::{Goal, ProofState}; use crate::proof_encoding; use crate::provers::ProverKind; @@ -879,7 +879,7 @@ fn base64_encode(bytes: &[u8]) -> String { #[cfg(test)] mod tests { use super::*; - use crate::core::{Context, Term}; + use crate::core::{Context, Tactic, Term}; use std::collections::HashMap; fn sample_goal() -> Goal { From d1654225889a7ef4df2ce331224a194d779dbcdd Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 20 Apr 2026 10:19:03 +0000 Subject: [PATCH 7/7] fix(bench): add missing diagnostics field in routing bench DispatchConfig dispatch.rs:101 added a `diagnostics: bool` field to DispatchConfig under #[serde(default)] but routing_benchmarks.rs:305 still constructs the config with the old field set, breaking `cargo bench --no-run`. Add `diagnostics: false` to match the field set. Production callers keep using DispatchConfig::default() so this only affects the bench. --- benches/routing_benchmarks.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/benches/routing_benchmarks.rs b/benches/routing_benchmarks.rs index 754d49e..dda05a6 100644 --- a/benches/routing_benchmarks.rs +++ b/benches/routing_benchmarks.rs @@ -308,6 +308,7 @@ fn bench_dispatch_config_construction(c: &mut Criterion) { track_axioms: true, generate_certificates: true, timeout: 60, + diagnostics: false, }) }) });