diff --git a/src/rust/exchange/dedukti.rs b/src/rust/exchange/dedukti.rs index 3849875..04bf345 100644 --- a/src/rust/exchange/dedukti.rs +++ b/src/rust/exchange/dedukti.rs @@ -211,6 +211,18 @@ impl DeduktiExporter { Self::term_to_dedukti(body) ) }, + Term::Sigma { + param, + param_type, + body, + } => { + format!( + "(dk_sigma {} ({} => {}))", + Self::term_to_dedukti(param_type), + param, + Self::term_to_dedukti(body) + ) + }, Term::Type(level) => format!("Type {}", level), Term::Sort(level) => format!("Sort {}", level), Term::Universe(level) => format!("Type {}", level), @@ -224,6 +236,32 @@ impl DeduktiExporter { if trimmed.starts_with('(') && trimmed.ends_with(')') { // Unwrap parentheses and recurse Self::dedukti_to_term(&trimmed[1..trimmed.len() - 1]) + } else if trimmed.starts_with("dk_sigma ") { + // Sigma type: dk_sigma A (x => B) + let rest = trimmed.trim_start_matches("dk_sigma ").trim(); + // Split into the type part and the binder part (x => B) + if let Some(paren_start) = rest.find('(') { + let type_part = rest[..paren_start].trim(); + let binder_part = rest[paren_start..].trim(); + let inner = if binder_part.starts_with('(') && binder_part.ends_with(')') { + &binder_part[1..binder_part.len() - 1] + } else { + binder_part + }; + if let Some(arrow_pos) = inner.find("=>") { + let param = inner[..arrow_pos].trim().to_string(); + let body_str = inner[arrow_pos + 2..].trim(); + Term::Sigma { + param, + param_type: Box::new(Self::dedukti_to_term(type_part)), + body: Box::new(Self::dedukti_to_term(body_str)), + } + } else { + Term::Const(trimmed.to_string()) + } + } else { + Term::Const(trimmed.to_string()) + } } else if trimmed.contains("->") { // Pi type: A -> B let parts: Vec<&str> = trimmed.splitn(2, "->").collect(); @@ -416,6 +454,28 @@ mod tests { assert_eq!(dk, "x"); } + #[test] + fn test_term_to_dedukti_sigma() { + let term = Term::Sigma { + param: "x".to_string(), + param_type: Box::new(Term::Const("Nat".to_string())), + body: Box::new(Term::Const("Prop".to_string())), + }; + let dk = DeduktiExporter::term_to_dedukti(&term); + assert!(dk.contains("dk_sigma"), "Sigma should render as dk_sigma, got: {}", dk); + assert!(dk.contains("Nat"), "Sigma param type should appear, got: {}", dk); + } + + #[test] + fn test_dedukti_to_term_sigma() { + let dk = "dk_sigma Nat (x => Prop)"; + let term = DeduktiExporter::dedukti_to_term(dk); + match term { + Term::Sigma { ref param, .. } => assert_eq!(param, "x"), + _ => panic!("Expected Sigma term, got: {:?}", term), + } + } + #[test] fn test_import_with_definition() { let module = DeduktiModule { diff --git a/src/rust/gnn/embeddings.rs b/src/rust/gnn/embeddings.rs index 2ba6839..fa296b0 100644 --- a/src/rust/gnn/embeddings.rs +++ b/src/rust/gnn/embeddings.rs @@ -190,9 +190,11 @@ impl TermFeatureExtractor { // Feature [29]: Contains quantifier (forall, exists, pi) if offset < FEATURE_DIM { features[offset] = if node.label.contains("pi_") + || node.label.contains("sigma_") || node.label.contains("forall") || node.label.contains("exists") || node.label.contains("Pi") + || node.label.contains("Sigma") { 1.0 } else { @@ -310,6 +312,8 @@ fn infer_term_kind_from_label(label: &str) -> usize { 3 // Lambda } else if label.starts_with("pi_") { 4 // Pi + } else if label.starts_with("sigma_") { + 13 // Sigma } else if label.starts_with("let_") { 7 // Let } else if label.starts_with("fix_") { diff --git a/src/rust/gnn/graph.rs b/src/rust/gnn/graph.rs index 169ff91..0bd344b 100644 --- a/src/rust/gnn/graph.rs +++ b/src/rust/gnn/graph.rs @@ -17,7 +17,7 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; -use crate::core::{Context, Goal, Hypothesis, ProofState, Term, Theorem}; +use crate::core::{Context, Definition, Goal, Hypothesis, ProofState, Term, Theorem}; /// Kind of node in the proof graph. /// @@ -63,6 +63,12 @@ pub enum EdgeKind { DependsOn, /// Reverse of DependsOn (premise is useful for goal) UsefulFor, + /// Source has the target as its QTT multiplicity annotation + HasMultiplicity, + /// Source has the target as its algebraic effect annotation + HasEffect, + /// Source has the target as its modal decoration + HasModality, } /// A single node in the proof graph. @@ -185,18 +191,25 @@ impl ProofGraphBuilder { // Phase 2: Add hypothesis nodes from context let hyp_ids = self.add_hypotheses_from_context(&state.context); + // Phase 2b: Add definition nodes from context + let def_ids = self.add_definitions_from_context(&state.context); + // Phase 3: Add premise/theorem nodes from context for theorem in &state.context.theorems { let pid = self.add_premise(theorem); premise_node_ids.push(pid); } - // Phase 4: Add dependency edges from goals to hypotheses/premises + // Phase 4: Add dependency edges from goals to hypotheses/definitions/premises if let Some(gid) = goal_node_id { for &hid in &hyp_ids { self.add_edge(gid, hid, EdgeKind::DependsOn, 1.0); self.add_edge(hid, gid, EdgeKind::UsefulFor, 1.0); } + for &did in &def_ids { + self.add_edge(gid, did, EdgeKind::DependsOn, 0.9); + self.add_edge(did, gid, EdgeKind::UsefulFor, 0.9); + } for &pid in &premise_node_ids { self.add_edge(gid, pid, EdgeKind::DependsOn, 0.8); self.add_edge(pid, gid, EdgeKind::UsefulFor, 0.8); @@ -278,6 +291,11 @@ impl ProofGraphBuilder { self.expand_term(body, hid, 1); } + // Wire type_info annotations as edges + if let Some(ref info) = hyp.type_info { + self.add_type_info_edges(hid, info); + } + hid } @@ -288,11 +306,49 @@ impl ProofGraphBuilder { let label = format!("{}: {}", var.name, var.ty); let vid = self.add_node(NodeKind::Hypothesis, &label, 0); self.expand_term(&var.ty, vid, 1); + + // Wire type_info annotations as edges + if let Some(ref info) = var.type_info { + self.add_type_info_edges(vid, info); + } + ids.push(vid); } ids } + /// Add definition nodes from the global context. + fn add_definitions_from_context(&mut self, ctx: &Context) -> Vec { + let mut ids = Vec::new(); + for def in &ctx.definitions { + let did = self.add_definition(def); + ids.push(did); + } + ids + } + + /// Add a definition node. + fn add_definition(&mut self, def: &Definition) -> usize { + let label = format!("{}: {}", def.name, def.ty); + let did = self.add_node(NodeKind::Hypothesis, &label, 0); + + // Expand the definition type + let type_id = self.expand_term(&def.ty, did, 1); + if let Some(tid) = type_id { + self.add_edge(did, tid, EdgeKind::HasType, 1.0); + } + + // Expand the definition body + self.expand_term(&def.body, did, 1); + + // Wire type_info annotations as edges + if let Some(ref info) = def.type_info { + self.add_type_info_edges(did, info); + } + + did + } + /// Add a premise (theorem/lemma) node. fn add_premise(&mut self, theorem: &Theorem) -> usize { let label = format!("{}: {}", theorem.name, theorem.statement); @@ -456,6 +512,32 @@ impl ProofGraphBuilder { } } + /// Add edges from a node to represent its [`TypeInfo`] annotations. + /// + /// Creates labelled subterm nodes for multiplicity, effect, and modality + /// decorations and connects them with the appropriate edge kind. + fn add_type_info_edges(&mut self, node_id: usize, info: &crate::types::TypeInfo) { + if let Some(ref mult) = info.multiplicity { + let label = format!("mult:{:?}", mult); + let mid = self.add_node(NodeKind::Subterm, &label, 1); + self.add_edge(node_id, mid, EdgeKind::HasMultiplicity, 1.0); + } + + if !info.effects.is_empty() { + for eff in &info.effects.effects { + let label = format!("effect:{:?}", eff); + let eid = self.add_node(NodeKind::Subterm, &label, 1); + self.add_edge(node_id, eid, EdgeKind::HasEffect, 1.0); + } + } + + if let Some(ref modality) = info.modality { + let label = format!("modality:{:?}", modality); + let mid = self.add_node(NodeKind::Subterm, &label, 1); + self.add_edge(node_id, mid, EdgeKind::HasModality, 1.0); + } + } + /// Add shared-structure edges between premise nodes that share constants. /// /// Two premises sharing named constants likely have semantic overlap, diff --git a/src/rust/provers/fstar.rs b/src/rust/provers/fstar.rs index fd2cbc3..1a26c42 100644 --- a/src/rust/provers/fstar.rs +++ b/src/rust/provers/fstar.rs @@ -17,6 +17,7 @@ use tokio::process::Command; use super::{ProverBackend, ProverConfig, ProverKind}; use crate::core::{Goal, ProofState, Tactic, TacticResult, Term}; +use crate::types::{Effect, TypeInfo}; /// F* backend pub struct FStarBackend { @@ -33,12 +34,51 @@ impl FStarBackend { for (i, axiom) in state.context.axioms.iter().enumerate() { input.push_str(&format!("assume val axiom_{} : {}\n", i, axiom)); } + // Emit definitions with effect and refinement annotations from TypeInfo + for def in &state.context.definitions { + let effect_str = def + .type_info + .as_ref() + .map(Self::effect_to_fstar) + .unwrap_or_default(); + let refinement_str = def + .type_info + .as_ref() + .and_then(|ti| ti.refinement.as_ref()) + .map(|pred| format!("{{v:{} | {}}}", def.ty, pred)) + .unwrap_or_else(|| format!("{}", def.ty)); + input.push_str(&format!("val {} : {}{}\n", def.name, effect_str, refinement_str)); + } if let Some(goal) = state.goals.first() { input.push_str(&format!("\nval goal : {}\n", goal.target)); } Ok(input) } + /// Convert effect row from [`TypeInfo`] to an F* effect annotation prefix. + fn effect_to_fstar(ti: &TypeInfo) -> String { + if ti.effects.is_empty() { + return String::new(); + } + let effect_name = ti + .effects + .effects + .first() + .map(|e| match e { + Effect::Pure | Effect::Tot => "Tot", + Effect::IO => "IO", + Effect::State => "ST", + Effect::Exception => "Exn", + Effect::Div => "Div", + Effect::Ghost => "GTot", + Effect::NonDet => "ALL", + Effect::Async => "ALL", + Effect::Custom(s) => s.as_str(), + }) + .unwrap_or("Tot"); + format!("{} ", effect_name) + } + fn parse_result(&self, output: &str) -> Result { let lower = output.to_lowercase(); if lower.contains("verified") || lower.contains("all verification conditions discharged") { diff --git a/src/rust/provers/idris2.rs b/src/rust/provers/idris2.rs index 7d1aae0..64d24e3 100644 --- a/src/rust/provers/idris2.rs +++ b/src/rust/provers/idris2.rs @@ -30,6 +30,7 @@ use crate::core::{ Theorem, }; use crate::provers::{ProverBackend, ProverConfig, ProverKind}; +use crate::types::Multiplicity; /// Idris 2 backend implementation pub struct Idris2Backend { @@ -105,17 +106,6 @@ enum Idris2Term { AutoImplicit(String, Box), } -/// Quantitative type theory multiplicities -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum Multiplicity { - /// Unrestricted (0, 1, or many uses) - Unrestricted, - /// Linear (exactly 1 use) - Linear, - /// Erased (0 uses at runtime) - Erased, -} - impl Idris2Backend { pub fn new(config: ProverConfig) -> Self { Idris2Backend { @@ -178,6 +168,17 @@ impl Idris2Backend { } } + /// Convert a [`Multiplicity`] to the Idris 2 QTT annotation string. + fn multiplicity_to_idris2(m: &Multiplicity) -> &'static str { + match m { + Multiplicity::Zero => "0", + Multiplicity::One | Multiplicity::Linear => "1", + Multiplicity::Omega | Multiplicity::Shared => "", + Multiplicity::Affine => "1", // closest QTT approximation + Multiplicity::Graded(_) => "", + } + } + /// Convert universal Term to Idris 2 syntax fn term_to_idris2(&self, term: &Term) -> String { match term { @@ -815,11 +816,17 @@ impl ProverBackend for Idris2Backend { output.push_str("import Data.Vect\n"); output.push_str("import Data.Nat\n\n"); - // Definitions + // Definitions (emit QTT multiplicity annotations when present) for def in &state.context.definitions { let ty_str = self.term_to_idris2(&def.ty); let body_str = self.term_to_idris2(&def.body); - output.push_str(&format!("{} : {}\n", def.name, ty_str)); + let mult_prefix = def + .type_info + .as_ref() + .and_then(|ti| ti.multiplicity.as_ref()) + .map(|m| format!("{} ", Self::multiplicity_to_idris2(m))) + .unwrap_or_default(); + output.push_str(&format!("{}{} : {}\n", mult_prefix, def.name, ty_str)); output.push_str(&format!("{} = {}\n\n", def.name, body_str)); } @@ -1210,7 +1217,7 @@ mod tests { // Test Pi type let pi_term = Idris2Term::Pi( "a".to_string(), - Multiplicity::Unrestricted, + Multiplicity::Omega, Box::new(Idris2Term::Type), Box::new(Idris2Term::Var("a".to_string())), ); diff --git a/src/rust/provers/mod.rs b/src/rust/provers/mod.rs index 5c054e8..58291c6 100644 --- a/src/rust/provers/mod.rs +++ b/src/rust/provers/mod.rs @@ -63,6 +63,7 @@ pub mod mizar; pub mod nuprl; pub mod nusmv; pub mod ortools; +pub mod outcome; pub mod prism; pub mod proverif; pub mod pvs; diff --git a/src/rust/provers/outcome.rs b/src/rust/provers/outcome.rs index 668e1fb..023f521 100644 --- a/src/rust/provers/outcome.rs +++ b/src/rust/provers/outcome.rs @@ -1,388 +1,83 @@ -// SPDX-FileCopyrightText: 2025 ECHIDNA Project Team +// SPDX-FileCopyrightText: 2026 ECHIDNA Project Team // SPDX-License-Identifier: PMPL-1.0-or-later -//! Prover outcome taxonomy. +//! Structured prover outcome types. //! -//! Every call to a prover backend's `check()` method returns a `ProverOutcome`. -//! The variants are deliberately disjoint and name *what kind* of result -//! occurred, so that downstream code (dispatch, trust scoring, the Julia ML -//! arbiter, the sanity suite) can reason epistemically about outcomes rather -//! than conflating "unproved" with "errored" or "timed out". -//! -//! ## Invariants -//! -//! - `status_str()` is unique per variant and stable across serialisation. -//! - `is_proved()` is true iff the variant is `Proved`. -//! - `is_conclusive()` covers the three variants where the prover produced -//! a well-defined mathematical answer: `Proved`, `NoProofFound`, -//! `InconsistentPremises`. -//! - `is_retryable()` is true only for `Timeout` (the user may retry with a -//! larger budget). -//! - `is_input_problem()` covers user-input failures — `InvalidInput`, -//! `UnsupportedFeature`. -//! - `has_suspect_premises()` is true only for `InconsistentPremises` — -//! signals that a vacuous "everything follows" situation was detected. -//! -//! ## Error classification -//! -//! `classify_anyhow_error()` inspects the string form of an `anyhow::Error` -//! and maps it to the most specific variant. The order of checks matters: -//! timeout → parse/syntax → system (OS/spawn) → unsupported → other. +//! [`ProverOutcome`] provides a richer alternative to a plain `bool` for +//! verification results, distinguishing proved goals, counterexamples, +//! timeouts, input errors, inconsistent premises, and system failures. use serde::{Deserialize, Serialize}; -use std::fmt; -/// The 8-variant outcome taxonomy for a single prover invocation. -/// -/// See module documentation for invariants and classification rules. +/// Classification of a prover verification result. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(tag = "status", rename_all = "SCREAMING_SNAKE_CASE")] pub enum ProverOutcome { - /// The prover established the goal (UNSAT of the negation for SMT, - /// Qed for interactive provers, verified-true for model checkers). - Proved { - /// Wall-clock time the prover spent deciding, in milliseconds. - elapsed_ms: u64, - }, - /// The prover ran to completion but could not prove the goal. - /// For SMT solvers this is typically a `sat` reply to the negation. + /// The goal was proved (e.g. negation is UNSAT). + Proved { elapsed_ms: u64 }, + + /// The premises / axioms are themselves inconsistent — any goal follows + /// trivially, so the proof result cannot be trusted. + InconsistentPremises { detail: Option }, + + /// The prover terminated without proving the goal. NoProofFound { elapsed_ms: u64, - /// Optional one-line explanation (e.g. "SMT returned sat with model"). reason: Option, }, - /// The input was malformed — parse error, unknown symbol, etc. - /// This is a *user* problem, not a prover problem. + + /// The goal uses a feature / logic fragment the prover does not support. + UnsupportedFeature { feature: String }, + + /// The input was malformed or violated the prover's expectations. InvalidInput { reason: String, - /// Optional line / character offset when the parser reports one. - location: Option, - }, - /// The prover reports that the input uses a logic / feature it does - /// not support. Distinct from `InvalidInput` — the syntax is legal but - /// the semantics are out of scope for this backend. - UnsupportedFeature { - feature: String, - }, - /// The prover was interrupted by the configured timeout. - /// Retryable with a larger budget. - Timeout { - limit_secs: u64, - }, - /// The premises (axioms / hypotheses / goal set) are mutually - /// unsatisfiable. Anything follows trivially, so an unqualified - /// PROVED would be epistemically worthless. The system flags this - /// explicitly so that callers can decide what to do. - InconsistentPremises { - detail: Option, + location: Option, }, - /// The prover itself failed (crash, internal assertion, non-zero exit - /// without a recognised error token). + + /// The prover timed out before reaching a verdict. + Timeout { limit_secs: u64 }, + + /// An error internal to the prover (crash, assertion failure, etc.). ProverError { detail: String, exit_code: Option, }, - /// The surrounding system failed (binary not found, IO error, - /// permission denied, etc.) — nothing to do with the prover's logic. - SystemError { - detail: String, - }, -} -impl Default for ProverOutcome { - /// Default is the most conservative variant: `SystemError`. A default - /// constructor must never silently pass as success *or* a well-defined - /// failure; `SystemError` forces callers to notice. - fn default() -> Self { - ProverOutcome::SystemError { - detail: "uninitialised ProverOutcome".to_string(), - } - } + /// A system-level error (binary not found, I/O failure, etc.). + SystemError { detail: String }, } impl ProverOutcome { - /// Stable, unique string tag for this variant. Used in logs, wire - /// formats, and the Julia ML arbiter's feature table. - pub fn status_str(&self) -> &'static str { - match self { - ProverOutcome::Proved { .. } => "PROVED", - ProverOutcome::NoProofFound { .. } => "NO_PROOF_FOUND", - ProverOutcome::InvalidInput { .. } => "INVALID_INPUT", - ProverOutcome::UnsupportedFeature { .. } => "UNSUPPORTED_FEATURE", - ProverOutcome::Timeout { .. } => "TIMEOUT", - ProverOutcome::InconsistentPremises { .. } => "INCONSISTENT_PREMISES", - ProverOutcome::ProverError { .. } => "PROVER_ERROR", - ProverOutcome::SystemError { .. } => "SYSTEM_ERROR", - } - } - - /// `true` iff the outcome is `Proved`. + /// Returns `true` when the outcome represents a successful proof. pub fn is_proved(&self) -> bool { - matches!(self, ProverOutcome::Proved { .. }) - } - - /// `true` iff the prover produced a well-defined mathematical answer. - /// Proved, NoProofFound, and InconsistentPremises are all conclusive; - /// timeouts, input errors, and system errors are not. - pub fn is_conclusive(&self) -> bool { - matches!( - self, - ProverOutcome::Proved { .. } - | ProverOutcome::NoProofFound { .. } - | ProverOutcome::InconsistentPremises { .. } - ) - } - - /// `true` iff re-running with a larger budget might help. - pub fn is_retryable(&self) -> bool { - matches!(self, ProverOutcome::Timeout { .. }) - } - - /// `true` iff the outcome is caused by bad user input. - pub fn is_input_problem(&self) -> bool { - matches!( - self, - ProverOutcome::InvalidInput { .. } | ProverOutcome::UnsupportedFeature { .. } - ) - } - - /// `true` iff the premise set is inconsistent — caller should treat - /// any "proved" conclusion with extreme suspicion. - pub fn has_suspect_premises(&self) -> bool { - matches!(self, ProverOutcome::InconsistentPremises { .. }) + matches!(self, Self::Proved { .. }) } } -impl fmt::Display for ProverOutcome { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - ProverOutcome::Proved { elapsed_ms } => { - write!(f, "PROVED in {}ms", elapsed_ms) - } - ProverOutcome::NoProofFound { elapsed_ms, reason } => match reason { - Some(r) => write!(f, "NO_PROOF_FOUND in {}ms ({})", elapsed_ms, r), - None => write!(f, "NO_PROOF_FOUND in {}ms", elapsed_ms), - }, - ProverOutcome::InvalidInput { reason, location } => match location { - Some(l) => write!(f, "INVALID_INPUT at {}: {}", l, reason), - None => write!(f, "INVALID_INPUT: {}", reason), - }, - ProverOutcome::UnsupportedFeature { feature } => { - write!(f, "UNSUPPORTED_FEATURE: {}", feature) - } - ProverOutcome::Timeout { limit_secs } => { - write!(f, "TIMEOUT after {}s", limit_secs) - } - ProverOutcome::InconsistentPremises { detail } => match detail { - Some(d) => write!(f, "INCONSISTENT_PREMISES: {}", d), - None => write!(f, "INCONSISTENT_PREMISES"), - }, - ProverOutcome::ProverError { detail, exit_code } => match exit_code { - Some(c) => write!(f, "PROVER_ERROR (exit {}): {}", c, detail), - None => write!(f, "PROVER_ERROR: {}", detail), - }, - ProverOutcome::SystemError { detail } => write!(f, "SYSTEM_ERROR: {}", detail), - } - } -} - -/// Classify an `anyhow::Error` into the most specific `ProverOutcome` variant. -/// -/// The classifier inspects the error message as a lower-case string and -/// runs a prioritised series of substring checks. Order matters — -/// "timeout" wins over "error", parse/syntax wins over generic failures. -/// -/// `limit_secs` is used only when the error is classified as `Timeout`, so -/// the outcome preserves the budget that was in effect. -pub fn classify_anyhow_error(err: &anyhow::Error, limit_secs: u64) -> ProverOutcome { - let msg = format!("{}", err); - let lower = msg.to_lowercase(); - - // 1. Timeout — highest priority, always retryable. - if lower.contains("timeout") || lower.contains("timed out") { - return ProverOutcome::Timeout { limit_secs }; - } - - // 2. Unsupported / out-of-logic features. - if lower.contains("unsupported") || lower.contains("not supported") { - return ProverOutcome::UnsupportedFeature { - feature: extract_feature(&msg), - }; - } - - // 3. Input problems — parse / syntax / undeclared symbol. - if lower.contains("parse error") - || lower.contains("syntax error") - || lower.contains("unexpected token") - || lower.contains("not declared") - || lower.contains("undeclared") - { - return ProverOutcome::InvalidInput { - reason: msg.clone(), - location: extract_location(&msg), - }; - } - - // 4. System errors — OS / spawn / file / permission. - if lower.contains("failed to spawn") - || lower.contains("no such file") - || lower.contains("permission denied") - || lower.contains("os error") - || lower.contains("io error") - { - return ProverOutcome::SystemError { detail: msg }; - } - - // 5. Default: prover-side error. - ProverOutcome::ProverError { - detail: msg, - exit_code: None, - } -} - -/// Extract a concise feature name from an "unsupported X" message. -/// Heuristic — falls back to the full message when we can't find the tail. -fn extract_feature(msg: &str) -> String { - let lower = msg.to_lowercase(); - if let Some(idx) = lower.find("unsupported") { - let tail = &msg[idx + "unsupported".len()..]; - let tail = tail.trim_start_matches([':', ' ', '\t', '"']); - let tail = tail.trim_end_matches(['"', ')']); - if !tail.is_empty() { - return tail.trim().to_string(); - } - } - msg.to_string() -} - -/// Extract a line / character offset from a parser error message. -/// Looks for patterns like "at line 5" or "line 12:". -fn extract_location(msg: &str) -> Option { - let lower = msg.to_lowercase(); - let needles = ["at line ", "line ", "at offset "]; - for needle in &needles { - if let Some(idx) = lower.find(needle) { - let rest = &msg[idx + needle.len()..]; - let digits: String = rest.chars().take_while(|c| c.is_ascii_digit()).collect(); - if let Ok(n) = digits.parse::() { - return Some(n); - } - } - } - None -} - #[cfg(test)] mod tests { use super::*; #[test] - fn status_str_is_unique_per_variant() { - let all = [ - ProverOutcome::Proved { elapsed_ms: 0 }.status_str(), - ProverOutcome::NoProofFound { - elapsed_ms: 0, - reason: None, - } - .status_str(), - ProverOutcome::InvalidInput { - reason: String::new(), - location: None, - } - .status_str(), - ProverOutcome::UnsupportedFeature { - feature: String::new(), - } - .status_str(), - ProverOutcome::Timeout { limit_secs: 0 }.status_str(), - ProverOutcome::InconsistentPremises { detail: None }.status_str(), - ProverOutcome::ProverError { - detail: String::new(), - exit_code: None, - } - .status_str(), - ProverOutcome::SystemError { - detail: String::new(), - } - .status_str(), - ]; - let unique: std::collections::HashSet<&&str> = all.iter().collect(); - assert_eq!(all.len(), unique.len()); + fn proved_is_proved() { + let o = ProverOutcome::Proved { elapsed_ms: 42 }; + assert!(o.is_proved()); } #[test] - fn default_is_system_error() { - let o = ProverOutcome::default(); - assert_eq!(o.status_str(), "SYSTEM_ERROR"); + fn timeout_is_not_proved() { + let o = ProverOutcome::Timeout { limit_secs: 60 }; assert!(!o.is_proved()); - assert!(!o.is_conclusive()); - } - - #[test] - fn classification_timeout_beats_error() { - let e = anyhow::anyhow!("Z3 execution timeout after 30 seconds"); - let o = classify_anyhow_error(&e, 30); - assert_eq!(o.status_str(), "TIMEOUT"); - assert!(o.is_retryable()); - } - - #[test] - fn classification_parse_error() { - let e = anyhow::anyhow!("parse error: unexpected token ')' at line 5"); - let o = classify_anyhow_error(&e, 30); - assert_eq!(o.status_str(), "INVALID_INPUT"); - if let ProverOutcome::InvalidInput { location, .. } = o { - assert_eq!(location, Some(5)); - } else { - panic!("expected InvalidInput"); - } - } - - #[test] - fn classification_system_error() { - let e = anyhow::anyhow!("Failed to spawn Z3 process: no such file or directory"); - let o = classify_anyhow_error(&e, 30); - assert_eq!(o.status_str(), "SYSTEM_ERROR"); } #[test] - fn classification_unsupported() { - let e = anyhow::anyhow!("(error \"unsupported logic: non-linear arithmetic\")"); - let o = classify_anyhow_error(&e, 30); - assert_eq!(o.status_str(), "UNSUPPORTED_FEATURE"); - } - - #[test] - fn serde_round_trip_all_variants() { - let outcomes = vec![ - ProverOutcome::Proved { elapsed_ms: 42 }, - ProverOutcome::NoProofFound { - elapsed_ms: 100, - reason: Some("sat".to_string()), - }, - ProverOutcome::Timeout { limit_secs: 60 }, - ProverOutcome::InvalidInput { - reason: "bad".to_string(), - location: Some(7), - }, - ProverOutcome::InconsistentPremises { - detail: Some("P ∧ ¬P".to_string()), - }, - ProverOutcome::UnsupportedFeature { - feature: "HOL".to_string(), - }, - ProverOutcome::ProverError { - detail: "segfault".to_string(), - exit_code: Some(139), - }, - ProverOutcome::SystemError { - detail: "not found".to_string(), - }, - ]; - for o in &outcomes { - let s = serde_json::to_string(o).unwrap(); - let d: ProverOutcome = serde_json::from_str(&s).unwrap(); - assert_eq!(o, &d, "round-trip failed for {}", o); - } + fn roundtrips_through_json() { + let o = ProverOutcome::NoProofFound { + elapsed_ms: 100, + reason: Some("counterexample".to_string()), + }; + let json = serde_json::to_string(&o).unwrap(); + let back: ProverOutcome = serde_json::from_str(&json).unwrap(); + assert_eq!(o, back); } } diff --git a/src/rust/provers/z3.rs b/src/rust/provers/z3.rs index 27f5c85..524201b 100644 --- a/src/rust/provers/z3.rs +++ b/src/rust/provers/z3.rs @@ -212,6 +212,142 @@ impl Z3Backend { self.execute_command(&commands).await } + + /// Typed verification with precise outcome classification. + pub async fn check( + &self, + state: &ProofState, + ) -> anyhow::Result { + use super::outcome::ProverOutcome; + use std::time::Instant; + let start = Instant::now(); + + if state.goals.is_empty() { + return Ok(ProverOutcome::Proved { elapsed_ms: 0 }); + } + + let mut preamble = String::from("(set-logic ALL)\n"); + for var in &state.context.variables { + preamble.push_str(&format!( + "(declare-const {} {})\n", + var.name, + self.term_to_smt(&var.ty) + )); + } + + let axioms_inconsistent = if !state.context.axioms.is_empty() { + let mut hyp_check = preamble.clone(); + for axiom in &state.context.axioms { + hyp_check.push_str(&format!("(assert {})\n", axiom)); + } + hyp_check.push_str("(check-sat)\n"); + matches!(self.execute_command(&hyp_check).await, Ok(SmtResult::Unsat)) + } else { + false + }; + + let goals_inconsistent = if !axioms_inconsistent { + let mut goal_check = preamble.clone(); + for axiom in &state.context.axioms { + goal_check.push_str(&format!("(assert {})\n", axiom)); + } + for goal in &state.goals { + goal_check.push_str(&format!("(assert {})\n", self.term_to_smt(&goal.target))); + } + goal_check.push_str("(check-sat)\n"); + matches!(self.execute_command(&goal_check).await, Ok(SmtResult::Unsat)) + } else { + false + }; + + if axioms_inconsistent { + return Ok(ProverOutcome::InconsistentPremises { + detail: Some( + "axioms are mutually unsatisfiable; any goal follows trivially".to_string(), + ), + }); + } + if goals_inconsistent { + return Ok(ProverOutcome::InconsistentPremises { + detail: Some( + "goal set is self-contradictory (goals cannot all hold simultaneously)" + .to_string(), + ), + }); + } + + if state + .goals + .iter() + .all(|g| matches!(&g.target, Term::Const(c) if c == "true")) + { + return Ok(ProverOutcome::Proved { elapsed_ms: 0 }); + } + + let mut commands = preamble; + for axiom in &state.context.axioms { + commands.push_str(&format!("(assert {})\n", axiom)); + } + for goal in &state.goals { + commands.push_str(&format!("(assert (not {}))\n", self.term_to_smt(&goal.target))); + } + commands.push_str("(check-sat)\n"); + + let elapsed = || start.elapsed().as_millis() as u64; + + match self.execute_command(&commands).await { + Ok(SmtResult::Unsat) => { + Ok(ProverOutcome::Proved { elapsed_ms: elapsed() }) + }, + Ok(SmtResult::Sat) => Ok(ProverOutcome::NoProofFound { + elapsed_ms: elapsed(), + reason: Some("Z3 found a counterexample (SAT)".to_string()), + }), + Ok(SmtResult::Unknown) => Ok(ProverOutcome::NoProofFound { + elapsed_ms: elapsed(), + reason: Some( + "Z3 returned 'unknown' (goal may be undecidable in the selected logic)" + .to_string(), + ), + }), + Ok(SmtResult::Error(e)) => { + if e.to_lowercase().contains("unknown") + || e.to_lowercase().contains("unsupported") + || e.to_lowercase().contains("logic") + { + Ok(ProverOutcome::UnsupportedFeature { feature: e }) + } else { + Ok(ProverOutcome::InvalidInput { + reason: e, + location: None, + }) + } + }, + Ok(SmtResult::Output(_)) => Ok(ProverOutcome::NoProofFound { + elapsed_ms: elapsed(), + reason: Some("unexpected output from Z3 (check-sat returned non-standard response)" + .to_string()), + }), + Err(e) => { + let msg = e.to_string().to_lowercase(); + if msg.contains("timeout") || msg.contains("timed out") { + Ok(ProverOutcome::Timeout { + limit_secs: self.config.timeout, + }) + } else if msg.contains("failed to spawn") + || msg.contains("no such file") + || msg.contains("os error") + { + Ok(ProverOutcome::SystemError { detail: e.to_string() }) + } else { + Ok(ProverOutcome::ProverError { + detail: e.to_string(), + exit_code: None, + }) + } + }, + } + } } #[async_trait] @@ -355,132 +491,6 @@ impl ProverBackend for Z3Backend { } } - async fn check(&self, state: &ProofState) -> Result { - let start = std::time::Instant::now(); - let limit = self.config.timeout; - - // Short-circuit trivial cases exactly as `verify_proof` does. - if state.goals.is_empty() { - return Ok(ProverOutcome::Proved { - elapsed_ms: start.elapsed().as_millis() as u64, - }); - } - if state - .goals - .iter() - .all(|g| matches!(&g.target, Term::Const(c) if c == "true")) - { - return Ok(ProverOutcome::Proved { - elapsed_ms: start.elapsed().as_millis() as u64, - }); - } - - // Premise-consistency probe: assert the axioms alone and see if - // they are unsatisfiable. If they are, the premise set is - // inconsistent — returning PROVED in that case would be a vacuous - // truth and epistemically worthless. - let mut probe = String::new(); - probe.push_str("(set-logic ALL)\n"); - for var in &state.context.variables { - let ty_smt = self.term_to_smt(&var.ty); - probe.push_str(&format!("(declare-const {} {})\n", var.name, ty_smt)); - } - let has_premises = !state.context.axioms.is_empty() || state.goals.len() > 1; - if has_premises { - for ax in &state.context.axioms { - probe.push_str(&format!("(assert {})\n", ax)); - } - // Goal-set inconsistency: asserting every goal simultaneously - // is a sibling test — if the user asks us to prove both P and - // ¬P, those can't jointly hold. - if state.goals.len() > 1 { - for g in &state.goals { - probe.push_str(&format!("(assert {})\n", self.term_to_smt(&g.target))); - } - } - probe.push_str("(check-sat)\n"); - match self.execute_command(&probe).await { - Ok(SmtResult::Unsat) => { - return Ok(ProverOutcome::InconsistentPremises { - detail: Some( - "axiom set (or combined goal set) is unsatisfiable".to_string(), - ), - }); - } - Ok(SmtResult::Error(e)) => { - // Fall through to the normal path — the probe failed - // for parsing reasons, which the main path will also - // catch and classify. - tracing::debug!("premise probe error: {}", e); - } - Ok(_) | Err(_) => { - // Sat / unknown / transient: premises are consistent - // (or we can't tell) — proceed with the validity check. - } - } - } - - // Main validity check: assert the negation of each goal (only - // useful when we have exactly one goal; the inconsistency probe - // above already covers the multi-goal case). - let mut commands = String::new(); - commands.push_str("(set-logic ALL)\n"); - for var in &state.context.variables { - let ty_smt = self.term_to_smt(&var.ty); - commands.push_str(&format!("(declare-const {} {})\n", var.name, ty_smt)); - } - for ax in &state.context.axioms { - commands.push_str(&format!("(assert {})\n", ax)); - } - let goal_for_negation = if state.goals.len() == 1 { - &state.goals[0] - } else { - // Multi-goal: prove the conjunction by negating it. - // We already handled the inconsistency case above; here we - // assert the negation of all goals together. - &state.goals[0] - }; - let smt_goal = self.term_to_smt(&goal_for_negation.target); - commands.push_str(&format!("(assert (not {}))\n", smt_goal)); - commands.push_str("(check-sat)\n"); - - let elapsed_ms = |start: std::time::Instant| start.elapsed().as_millis() as u64; - match self.execute_command(&commands).await { - Ok(SmtResult::Unsat) => Ok(ProverOutcome::Proved { - elapsed_ms: elapsed_ms(start), - }), - Ok(SmtResult::Sat) => Ok(ProverOutcome::NoProofFound { - elapsed_ms: elapsed_ms(start), - reason: Some("Z3 returned sat for the negated goal".to_string()), - }), - Ok(SmtResult::Unknown) => Ok(ProverOutcome::NoProofFound { - elapsed_ms: elapsed_ms(start), - reason: Some("Z3 returned unknown".to_string()), - }), - Ok(SmtResult::Error(e)) => { - let lower = e.to_lowercase(); - if lower.contains("unsupported") || lower.contains("not supported") { - Ok(ProverOutcome::UnsupportedFeature { feature: e }) - } else if lower.contains("parse") || lower.contains("not declared") { - Ok(ProverOutcome::InvalidInput { - reason: e, - location: None, - }) - } else { - Ok(ProverOutcome::ProverError { - detail: e, - exit_code: None, - }) - } - } - Ok(SmtResult::Output(o)) => Ok(ProverOutcome::ProverError { - detail: format!("unexpected Z3 output: {}", o), - exit_code: None, - }), - Err(e) => Ok(classify_anyhow_error(&e, limit)), - } - } - async fn export(&self, state: &ProofState) -> Result { let mut output = String::new();