hyperpolymath · hyperpolymath · Apr 20, 2026 · Apr 19, 2026 · Apr 19, 2026 · Apr 19, 2026
@@ -305,6 +305,7 @@ fn bench_dispatch_config_construction(c: &mut Criterion) {
                 track_axioms: true,
                 generate_certificates: true,
                 timeout: 60,
+                diagnostics: false,
             })
         })
     });

@@ -10,6 +10,14 @@
 #   data_dir  = training_data/
 #   save_dir  = models/neural/
 #
+# Environment overrides (see below):
+#   ECHIDNA_MAX_PROOF_STATES — cap on proof states loaded (default 200000 on
+#     GPU, 50000 on CPU).  Set to 0 to disable the cap and consume the entire
+#     expanded corpus; required when re-baselining after corpus growth.
+#   ECHIDNA_NUM_EPOCHS      — training epochs (default 30).
+#   ECHIDNA_NUM_NEGATIVES   — hard-negative premise samples per example
+#     (default 20).
+#
 # This script:
 #   1. Loads JSONL training data (proof states + premises)
 #   2. Builds vocabulary from the corpus
@@ -74,10 +82,24 @@ println("═══════════════════════
 println("Loading training data...")
 println("═══════════════════════════════════════════════════════════")
 
+# Default cap: 200k on GPU (enough to exercise the expanded corpus without
+# OOM on a 24GB card), 50k on CPU (keeps wall-clock finite).  An operator
+# re-baselining after corpus growth sets ECHIDNA_MAX_PROOF_STATES=0 to lift
+# the cap entirely.
+default_cap = has_gpu ? 200_000 : 50_000
+cap_env = get(ENV, "ECHIDNA_MAX_PROOF_STATES", "")
+max_proof_states = isempty(cap_env) ? default_cap : parse(Int, cap_env)
+# `load_training_data` treats any value `<= 0` as "load everything".
+cap_label = max_proof_states <= 0 ? "unlimited" : string(max_proof_states)
+println("  max_proof_states = $cap_label")
+
+num_negatives = parse(Int, get(ENV, "ECHIDNA_NUM_NEGATIVES", "20"))
+println("  num_negatives    = $num_negatives")
+
 train_data, val_data, vocab = load_training_data(data_dir;
     train_split=0.8f0,
-    max_proof_states=50000,  # Cap for reasonable training time
-    num_negatives=20
+    max_proof_states=max_proof_states,
+    num_negatives=num_negatives,
 )
 
 if isempty(train_data.examples)
@@ -101,8 +123,9 @@ println("Model created successfully")
 println()
 
 # Configure training
+num_epochs = parse(Int, get(ENV, "ECHIDNA_NUM_EPOCHS", "30"))
 training_config = TrainingConfig(
-    num_epochs=30,
+    num_epochs=num_epochs,
     learning_rate=1f-4,
     lr_schedule=:cosine,
     weight_decay=1f-5,

@@ -12,7 +12,9 @@ use async_trait::async_trait;
 use serde::{Deserialize, Serialize};
 use std::path::PathBuf;
 use tokio::sync::RwLock;
-use tracing::{debug, info, warn};
+use tracing::{debug, info};
+#[cfg(feature = "verisim")]
+use tracing::warn;
 
 use super::AgenticGoal;
 use crate::core::ProofState;

@@ -154,8 +154,8 @@ mod tests {
             hypotheses: vec![],
         };
 
-        let id1 = proof_identity("my_theorem", &goal, ProverKind::Lean4);
-        let id2 = proof_identity("my_theorem", &goal, ProverKind::Lean4);
+        let id1 = proof_identity("my_theorem", &goal, ProverKind::Lean);
+        let id2 = proof_identity("my_theorem", &goal, ProverKind::Lean);
         assert_eq!(id1, id2, "Same inputs must produce same identity");
     }
 
@@ -167,7 +167,7 @@ mod tests {
             hypotheses: vec![],
         };
 
-        let lean = proof_identity("thm", &goal, ProverKind::Lean4);
+        let lean = proof_identity("thm", &goal, ProverKind::Lean);
         let coq = proof_identity("thm", &goal, ProverKind::Coq);
         assert_ne!(
             lean, coq,

@@ -1355,9 +1355,13 @@ impl ProverFactory {
             // Note: .lean is shared between Lean 3 and Lean 4; default is Lean 4.
             // Use detect_from_file_content() for Lean 3 vs 4 disambiguation.
             "lean3" => Some(ProverKind::Lean3), // explicit extension
-            "thm" => Some(ProverKind::Abella),  // Abella .thm files
-            "dk" | "lp" => Some(ProverKind::Dedukti), // Dedukti / λΠ
-            "bpl" => Some(ProverKind::Boogie),  // Boogie intermediate language
+            "thm" => Some(ProverKind::Abella), // Abella .thm files
+            // Dedukti uses .dk; .lp (lambdapi dialect) is shadowed by GLPK above
+            // since LP/MIP files dominate that extension in the wild — a .lp
+            // input ambiguous between lambdapi and linear programming is
+            // resolved as GLPK.  Use detect_from_file_content() to disambiguate.
+            "dk" => Some(ProverKind::Dedukti), // Dedukti / λΠ
+            "bpl" => Some(ProverKind::Boogie), // Boogie intermediate language
             "ftl" => Some(ProverKind::Naproche), // Naproche controlled-NL
             "ma" => Some(ProverKind::Matita),   // Matita proof file
             "ard" => Some(ProverKind::Arend),   // Arend cubical HoTT

@@ -22,7 +22,9 @@
 
 use anyhow::{Context, Result};
 use serde::{Deserialize, Serialize};
-use tracing::{debug, info, warn};
+use tracing::{debug, info};
+#[cfg(feature = "verisim")]
+use tracing::warn;
 
 use crate::provers::ProverKind;
 
@@ -493,13 +495,11 @@ impl QueryExecutor {
 
     /// Find a specific proof by theorem name and optional prover.
     async fn execute_find_proof(&self, query: &ProofQuery) -> Result<QueryResult> {
-            type_info: None,
-        let theorem = query.theorem_name.as_deref().unwrap_or("");
-
         #[cfg(feature = "verisim")]
         if let Some(prover) = query.prover {
             // Generate the octad key and look it up directly
             if let Some(ref goal_display) = query.goal_display {
+                let theorem = query.theorem_name.as_deref().unwrap_or("");
                 let goal = Goal {
                     id: "query".to_string(),
                     target: crate::core::Term::Var(goal_display.clone()),

@@ -16,6 +16,8 @@ pub mod confidence;
 pub mod mutation;
 pub mod pareto;
 pub mod portfolio;
+#[cfg(feature = "verisim")]
+pub mod proof;
 pub mod statistics;
 
 pub use axiom_tracker::{AxiomPolicy, AxiomTracker, AxiomUsage, DangerLevel};
@@ -24,4 +26,8 @@ pub use confidence::TrustLevel;
 pub use mutation::{MutationKind, MutationResult, MutationTester};
 pub use pareto::{ParetoFrontier, ProofCandidate, ProofObjective};
 pub use portfolio::{PortfolioConfig, PortfolioResult, PortfolioSolver};
+#[cfg(feature = "verisim")]
+pub use proof::{
+    theorem_identity, Proof, ProofStateRecord, ProofVersion, TacticApplication, TacticStatus,
+};
 pub use statistics::StatisticsTracker;
@@ -23,9 +23,9 @@ use anyhow::{Context, Result};
 use chrono::Utc;
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
-use tracing::{debug, info, warn};
+use tracing::{debug, warn};
 
-use crate::core::{Goal, ProofState, Tactic};
+use crate::core::{Goal, ProofState};
 use crate::proof_encoding;
 use crate::provers::ProverKind;
 
@@ -252,7 +252,7 @@ pub struct SpatialPayload {
 ///
 /// Usage:
 /// ```ignore
-/// let octad = ProofOctadBuilder::new("my_theorem", &goal, ProverKind::Lean4)
+/// let octad = ProofOctadBuilder::new("my_theorem", &goal, ProverKind::Lean)
 ///     .with_proof_state(&proof_state)
 ///     .with_axioms(vec!["Classical.em".to_string()])
 ///     .with_aspects(vec!["logic".to_string()])
@@ -879,7 +879,7 @@ fn base64_encode(bytes: &[u8]) -> String {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::core::{Context, Term};
+    use crate::core::{Context, Tactic, Term};
     use std::collections::HashMap;
 
     fn sample_goal() -> Goal {
@@ -921,7 +921,7 @@ mod tests {
         let goal = sample_goal();
         let proof = sample_proof_state();
 
-        let octad = ProofOctadBuilder::new("nat_add_zero", &goal, ProverKind::Lean4)
+        let octad = ProofOctadBuilder::new("nat_add_zero", &goal, ProverKind::Lean)
             .with_proof_state(&proof)
             .with_axioms(vec!["Nat.rec".to_string()])
             .with_aspects(vec!["arithmetic".to_string(), "induction".to_string()])
@@ -932,9 +932,10 @@ mod tests {
         // Key should be a 64-char hex digest
         assert_eq!(octad.key.len(), 64);
 
-        // Semantic modality
+        // Semantic modality (ProverKind::Lean is the Lean 4 variant; Lean 3
+        // is a sibling ProverKind::Lean3.)
         assert_eq!(octad.semantic.status, ProofStatus::Complete);
-        assert_eq!(octad.semantic.prover, "Lean4");
+        assert_eq!(octad.semantic.prover, "Lean");
         assert!(!octad.semantic.proof_blob_b64.is_empty());
         assert_eq!(octad.semantic.axioms_used, vec!["Nat.rec"]);
 
@@ -964,7 +965,7 @@ mod tests {
 
         // Graph modality
         assert_eq!(octad.graph.cross_prover_id.len(), 64);
-        assert!(octad.graph.prover_id.contains("Lean4"));
+        assert!(octad.graph.prover_id.contains("Lean"));
 
         // Tensor modality
         assert_eq!(*octad.tensor.metrics.get("time_ms").unwrap(), 42.0);

@@ -111,6 +111,15 @@ fn kind_label(kind: ProverKind) -> &'static str {
         ProverKind::Dafny => "Dafny",
         ProverKind::FStar => "F*",
         ProverKind::TLAPS => "TLAPS",
+        ProverKind::Tamarin => "Tamarin",
+        ProverKind::ProVerif => "ProVerif",
+        ProverKind::Metamath => "Metamath",
+        ProverKind::Twelf => "Twelf",
+        ProverKind::ORTools => "OR-Tools",
+        ProverKind::HOL4 => "HOL4",
+        ProverKind::ACL2 => "ACL2",
+        ProverKind::SCIP => "SCIP",
+        ProverKind::Imandra => "Imandra",
         _ => "<other>",
     }
 }
@@ -215,3 +224,65 @@ async fn live_tlaps_version() {
     // TLA+ Proof System's prover is `tlapm` (per provers/mod.rs).
     assert_version_reachable(ProverKind::TLAPS, "tlapm").await;
 }
+
+// ==========================================================================
+// Tier 3 — weekly.  Upstream-tarball or heavier-build provers.  Most of
+// these SKIP locally and in PR CI; the weekly tier3 matrix in
+// live-provers.yml provisions each binary in its own job before running
+// `cargo test ... <backend>`, so only the matching test runs per job.
+// ==========================================================================
+
+#[tokio::test]
+async fn live_tamarin_version() {
+    assert_version_reachable(ProverKind::Tamarin, "tamarin-prover").await;
+}
+
+#[tokio::test]
+async fn live_proverif_version() {
+    assert_version_reachable(ProverKind::ProVerif, "proverif").await;
+}
+
+#[tokio::test]
+async fn live_metamath_version() {
+    assert_version_reachable(ProverKind::Metamath, "metamath").await;
+}
+
+#[tokio::test]
+async fn live_twelf_version() {
+    // Twelf's CLI entry is `twelf-server` per provers/mod.rs.
+    assert_version_reachable(ProverKind::Twelf, "twelf-server").await;
+}
+
+#[tokio::test]
+async fn live_ortools_version() {
+    // Echidna's ORTools backend invokes `ortools_solve` (wrapper around the
+    // OR-Tools C++ solve CLI).  Provisioned via upstream tarball.
+    assert_version_reachable(ProverKind::ORTools, "ortools_solve").await;
+}
+
+#[tokio::test]
+async fn live_hol4_version() {
+    // HOL4 requires Poly/ML + a tree build; provisioning is deferred to
+    // Containerfile.  Test SKIPs on runners without `hol` on PATH.
+    assert_version_reachable(ProverKind::HOL4, "hol").await;
+}
+
+#[tokio::test]
+async fn live_acl2_version() {
+    // ACL2 requires a Common Lisp image; provisioning deferred to Containerfile.
+    assert_version_reachable(ProverKind::ACL2, "acl2").await;
+}
+
+#[tokio::test]
+async fn live_scip_version() {
+    // SCIP requires a cmake build of SCIP Optimization Suite; deferred to
+    // Containerfile.  Test SKIPs until provisioned.
+    assert_version_reachable(ProverKind::SCIP, "scip").await;
+}
+
+#[tokio::test]
+async fn live_imandra_version() {
+    // Imandra is proprietary; handled via vendor-supplied container where a
+    // licence is available.  Test SKIPs on public CI.
+    assert_version_reachable(ProverKind::Imandra, "imandra").await;
+}