Skip to content
1 change: 1 addition & 0 deletions benches/routing_benchmarks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ fn bench_dispatch_config_construction(c: &mut Criterion) {
track_axioms: true,
generate_certificates: true,
timeout: 60,
diagnostics: false,
})
})
});
Expand Down
29 changes: 26 additions & 3 deletions src/julia/run_training.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@
# data_dir = training_data/
# save_dir = models/neural/
#
# Environment overrides (see below):
# ECHIDNA_MAX_PROOF_STATES — cap on proof states loaded (default 200000 on
# GPU, 50000 on CPU). Set to 0 to disable the cap and consume the entire
# expanded corpus; required when re-baselining after corpus growth.
# ECHIDNA_NUM_EPOCHS — training epochs (default 30).
# ECHIDNA_NUM_NEGATIVES — hard-negative premise samples per example
# (default 20).
#
# This script:
# 1. Loads JSONL training data (proof states + premises)
# 2. Builds vocabulary from the corpus
Expand Down Expand Up @@ -74,10 +82,24 @@ println("═══════════════════════
println("Loading training data...")
println("═══════════════════════════════════════════════════════════")

# Default cap: 200k on GPU (enough to exercise the expanded corpus without
# OOM on a 24GB card), 50k on CPU (keeps wall-clock finite). An operator
# re-baselining after corpus growth sets ECHIDNA_MAX_PROOF_STATES=0 to lift
# the cap entirely.
default_cap = has_gpu ? 200_000 : 50_000
cap_env = get(ENV, "ECHIDNA_MAX_PROOF_STATES", "")
max_proof_states = isempty(cap_env) ? default_cap : parse(Int, cap_env)
# `load_training_data` treats any value `<= 0` as "load everything".
cap_label = max_proof_states <= 0 ? "unlimited" : string(max_proof_states)
println(" max_proof_states = $cap_label")

num_negatives = parse(Int, get(ENV, "ECHIDNA_NUM_NEGATIVES", "20"))
println(" num_negatives = $num_negatives")

train_data, val_data, vocab = load_training_data(data_dir;
train_split=0.8f0,
max_proof_states=50000, # Cap for reasonable training time
num_negatives=20
max_proof_states=max_proof_states,
num_negatives=num_negatives,
)

if isempty(train_data.examples)
Expand All @@ -101,8 +123,9 @@ println("Model created successfully")
println()

# Configure training
num_epochs = parse(Int, get(ENV, "ECHIDNA_NUM_EPOCHS", "30"))
training_config = TrainingConfig(
num_epochs=30,
num_epochs=num_epochs,
learning_rate=1f-4,
lr_schedule=:cosine,
weight_decay=1f-5,
Expand Down
4 changes: 3 additions & 1 deletion src/rust/agent/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use std::path::PathBuf;
use tokio::sync::RwLock;
use tracing::{debug, info, warn};
use tracing::{debug, info};
#[cfg(feature = "verisim")]
use tracing::warn;

use super::AgenticGoal;
use crate::core::ProofState;
Expand Down
6 changes: 3 additions & 3 deletions src/rust/proof_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ mod tests {
hypotheses: vec![],
};

let id1 = proof_identity("my_theorem", &goal, ProverKind::Lean4);
let id2 = proof_identity("my_theorem", &goal, ProverKind::Lean4);
let id1 = proof_identity("my_theorem", &goal, ProverKind::Lean);
let id2 = proof_identity("my_theorem", &goal, ProverKind::Lean);
assert_eq!(id1, id2, "Same inputs must produce same identity");
}

Expand All @@ -167,7 +167,7 @@ mod tests {
hypotheses: vec![],
};

let lean = proof_identity("thm", &goal, ProverKind::Lean4);
let lean = proof_identity("thm", &goal, ProverKind::Lean);
let coq = proof_identity("thm", &goal, ProverKind::Coq);
assert_ne!(
lean, coq,
Expand Down
10 changes: 7 additions & 3 deletions src/rust/provers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1355,9 +1355,13 @@ impl ProverFactory {
// Note: .lean is shared between Lean 3 and Lean 4; default is Lean 4.
// Use detect_from_file_content() for Lean 3 vs 4 disambiguation.
"lean3" => Some(ProverKind::Lean3), // explicit extension
"thm" => Some(ProverKind::Abella), // Abella .thm files
"dk" | "lp" => Some(ProverKind::Dedukti), // Dedukti / λΠ
"bpl" => Some(ProverKind::Boogie), // Boogie intermediate language
"thm" => Some(ProverKind::Abella), // Abella .thm files
// Dedukti uses .dk; .lp (lambdapi dialect) is shadowed by GLPK above
// since LP/MIP files dominate that extension in the wild — a .lp
// input ambiguous between lambdapi and linear programming is
// resolved as GLPK. Use detect_from_file_content() to disambiguate.
"dk" => Some(ProverKind::Dedukti), // Dedukti / λΠ
"bpl" => Some(ProverKind::Boogie), // Boogie intermediate language
"ftl" => Some(ProverKind::Naproche), // Naproche controlled-NL
"ma" => Some(ProverKind::Matita), // Matita proof file
"ard" => Some(ProverKind::Arend), // Arend cubical HoTT
Expand Down
8 changes: 4 additions & 4 deletions src/rust/vcl_ut.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@

use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use tracing::{debug, info, warn};
use tracing::{debug, info};
#[cfg(feature = "verisim")]
use tracing::warn;

use crate::provers::ProverKind;

Expand Down Expand Up @@ -493,13 +495,11 @@ impl QueryExecutor {

/// Find a specific proof by theorem name and optional prover.
async fn execute_find_proof(&self, query: &ProofQuery) -> Result<QueryResult> {
type_info: None,
let theorem = query.theorem_name.as_deref().unwrap_or("");

#[cfg(feature = "verisim")]
if let Some(prover) = query.prover {
// Generate the octad key and look it up directly
if let Some(ref goal_display) = query.goal_display {
let theorem = query.theorem_name.as_deref().unwrap_or("");
let goal = Goal {
id: "query".to_string(),
target: crate::core::Term::Var(goal_display.clone()),
Expand Down
6 changes: 6 additions & 0 deletions src/rust/verification/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ pub mod confidence;
pub mod mutation;
pub mod pareto;
pub mod portfolio;
#[cfg(feature = "verisim")]
pub mod proof;
pub mod statistics;

pub use axiom_tracker::{AxiomPolicy, AxiomTracker, AxiomUsage, DangerLevel};
Expand All @@ -24,4 +26,8 @@ pub use confidence::TrustLevel;
pub use mutation::{MutationKind, MutationResult, MutationTester};
pub use pareto::{ParetoFrontier, ProofCandidate, ProofObjective};
pub use portfolio::{PortfolioConfig, PortfolioResult, PortfolioSolver};
#[cfg(feature = "verisim")]
pub use proof::{
theorem_identity, Proof, ProofStateRecord, ProofVersion, TacticApplication, TacticStatus,
};
pub use statistics::StatisticsTracker;
17 changes: 9 additions & 8 deletions src/rust/verisim_bridge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ use anyhow::{Context, Result};
use chrono::Utc;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use tracing::{debug, info, warn};
use tracing::{debug, warn};

use crate::core::{Goal, ProofState, Tactic};
use crate::core::{Goal, ProofState};
use crate::proof_encoding;
use crate::provers::ProverKind;

Expand Down Expand Up @@ -252,7 +252,7 @@ pub struct SpatialPayload {
///
/// Usage:
/// ```ignore
/// let octad = ProofOctadBuilder::new("my_theorem", &goal, ProverKind::Lean4)
/// let octad = ProofOctadBuilder::new("my_theorem", &goal, ProverKind::Lean)
/// .with_proof_state(&proof_state)
/// .with_axioms(vec!["Classical.em".to_string()])
/// .with_aspects(vec!["logic".to_string()])
Expand Down Expand Up @@ -879,7 +879,7 @@ fn base64_encode(bytes: &[u8]) -> String {
#[cfg(test)]
mod tests {
use super::*;
use crate::core::{Context, Term};
use crate::core::{Context, Tactic, Term};
use std::collections::HashMap;

fn sample_goal() -> Goal {
Expand Down Expand Up @@ -921,7 +921,7 @@ mod tests {
let goal = sample_goal();
let proof = sample_proof_state();

let octad = ProofOctadBuilder::new("nat_add_zero", &goal, ProverKind::Lean4)
let octad = ProofOctadBuilder::new("nat_add_zero", &goal, ProverKind::Lean)
.with_proof_state(&proof)
.with_axioms(vec!["Nat.rec".to_string()])
.with_aspects(vec!["arithmetic".to_string(), "induction".to_string()])
Expand All @@ -932,9 +932,10 @@ mod tests {
// Key should be a 64-char hex digest
assert_eq!(octad.key.len(), 64);

// Semantic modality
// Semantic modality (ProverKind::Lean is the Lean 4 variant; Lean 3
// is a sibling ProverKind::Lean3.)
assert_eq!(octad.semantic.status, ProofStatus::Complete);
assert_eq!(octad.semantic.prover, "Lean4");
assert_eq!(octad.semantic.prover, "Lean");
assert!(!octad.semantic.proof_blob_b64.is_empty());
assert_eq!(octad.semantic.axioms_used, vec!["Nat.rec"]);

Expand Down Expand Up @@ -964,7 +965,7 @@ mod tests {

// Graph modality
assert_eq!(octad.graph.cross_prover_id.len(), 64);
assert!(octad.graph.prover_id.contains("Lean4"));
assert!(octad.graph.prover_id.contains("Lean"));

// Tensor modality
assert_eq!(*octad.tensor.metrics.get("time_ms").unwrap(), 42.0);
Expand Down
71 changes: 71 additions & 0 deletions tests/live_prover_suite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,15 @@ fn kind_label(kind: ProverKind) -> &'static str {
ProverKind::Dafny => "Dafny",
ProverKind::FStar => "F*",
ProverKind::TLAPS => "TLAPS",
ProverKind::Tamarin => "Tamarin",
ProverKind::ProVerif => "ProVerif",
ProverKind::Metamath => "Metamath",
ProverKind::Twelf => "Twelf",
ProverKind::ORTools => "OR-Tools",
ProverKind::HOL4 => "HOL4",
ProverKind::ACL2 => "ACL2",
ProverKind::SCIP => "SCIP",
ProverKind::Imandra => "Imandra",
_ => "<other>",
}
}
Expand Down Expand Up @@ -215,3 +224,65 @@ async fn live_tlaps_version() {
// TLA+ Proof System's prover is `tlapm` (per provers/mod.rs).
assert_version_reachable(ProverKind::TLAPS, "tlapm").await;
}

// ==========================================================================
// Tier 3 — weekly. Upstream-tarball or heavier-build provers. Most of
// these SKIP locally and in PR CI; the weekly tier3 matrix in
// live-provers.yml provisions each binary in its own job before running
// `cargo test ... <backend>`, so only the matching test runs per job.
// ==========================================================================

#[tokio::test]
async fn live_tamarin_version() {
assert_version_reachable(ProverKind::Tamarin, "tamarin-prover").await;
}

#[tokio::test]
async fn live_proverif_version() {
assert_version_reachable(ProverKind::ProVerif, "proverif").await;
}

#[tokio::test]
async fn live_metamath_version() {
assert_version_reachable(ProverKind::Metamath, "metamath").await;
}

#[tokio::test]
async fn live_twelf_version() {
// Twelf's CLI entry is `twelf-server` per provers/mod.rs.
assert_version_reachable(ProverKind::Twelf, "twelf-server").await;
}

#[tokio::test]
async fn live_ortools_version() {
// Echidna's ORTools backend invokes `ortools_solve` (wrapper around the
// OR-Tools C++ solve CLI). Provisioned via upstream tarball.
assert_version_reachable(ProverKind::ORTools, "ortools_solve").await;
}

#[tokio::test]
async fn live_hol4_version() {
// HOL4 requires Poly/ML + a tree build; provisioning is deferred to
// Containerfile. Test SKIPs on runners without `hol` on PATH.
assert_version_reachable(ProverKind::HOL4, "hol").await;
}

#[tokio::test]
async fn live_acl2_version() {
// ACL2 requires a Common Lisp image; provisioning deferred to Containerfile.
assert_version_reachable(ProverKind::ACL2, "acl2").await;
}

#[tokio::test]
async fn live_scip_version() {
// SCIP requires a cmake build of SCIP Optimization Suite; deferred to
// Containerfile. Test SKIPs until provisioned.
assert_version_reachable(ProverKind::SCIP, "scip").await;
}

#[tokio::test]
async fn live_imandra_version() {
// Imandra is proprietary; handled via vendor-supplied container where a
// licence is available. Test SKIPs on public CI.
assert_version_reachable(ProverKind::Imandra, "imandra").await;
}
Loading