diff --git a/.containerization/Containerfile.mcp b/.containerization/Containerfile.mcp new file mode 100644 index 0000000..da2b3ed --- /dev/null +++ b/.containerization/Containerfile.mcp @@ -0,0 +1,149 @@ +# SPDX-License-Identifier: PMPL-1.0-or-later +# SPDX-FileCopyrightText: 2026 Jonathan D.A. Jewell +# +# ECHIDNA MCP Container — exposes ECHIDNA as a Model Context Protocol server +# over stdio so any MCP-capable AI agent (Claude Code, Claude Desktop, Claude +# API with tool use, other clients) can call `prove({file, prover, timeout_secs})` +# and get a real solver verdict. +# +# Bundled MVP prover set: Z3, CVC5, Coq, Agda, Metamath, SPASS (all apt), plus +# Lean 4 (via elan), Isabelle, F*, and Mizar where upstream distributions +# cooperate with an unprivileged build environment. +# +# Build: podman build -f .containerization/Containerfile.mcp -t echidna:mcp . +# Run: podman run --rm -i echidna:mcp # stdio MCP transport +# Use: {"mcpServers":{"echidna":{"command":"podman", +# "args":["run","--rm","-i","echidna:mcp"]}}} + +# ============================================================================= +# Stage 1: Rust builder — echidna + echidna-mcp + echidna-wire + typed_wasm +# ============================================================================= +FROM docker.io/library/rust:1.85-slim-bookworm AS rust-builder + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + pkg-config \ + libssl-dev \ + capnproto \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /build + +COPY Cargo.toml Cargo.lock ./ +COPY src/rust ./src/rust +COPY src/interfaces ./src/interfaces +COPY crates ./crates +COPY benches ./benches + +# Build both the CLI (echidna) and the MCP server (echidna-mcp). The MCP +# server shells out to the CLI via the ECHIDNA_BIN env var. +RUN cargo build --release --bin echidna -p echidna-mcp + +# ============================================================================= +# Stage 2: MVP prover installer (Debian apt-available set) +# ============================================================================= +FROM docker.io/library/debian:bookworm-slim AS mvp-installer + +RUN apt-get update && apt-get install -y --no-install-recommends \ + z3 \ + cvc5 \ + coq \ + agda-bin \ + metamath \ + spass \ + && rm -rf /var/lib/apt/lists/* + +# ============================================================================= +# Stage 3: Lean 4 installer (elan + stable toolchain) +# ============================================================================= +FROM docker.io/library/debian:bookworm-slim AS lean-installer +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl ca-certificates bash \ + && rm -rf /var/lib/apt/lists/* +RUN curl -fsSL https://raw.githubusercontent.com/leanprover/elan/master/elan-init.sh \ + | bash -s -- -y --default-toolchain stable \ + && . "$HOME/.elan/env" \ + && lean --version + +# ============================================================================= +# Stage 4: Isabelle 2025 installer (tarball + pre-built Main heap) +# ============================================================================= +FROM docker.io/library/debian:bookworm-slim AS isabelle-installer +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl ca-certificates perl fontconfig \ + && rm -rf /var/lib/apt/lists/* +RUN curl -fsSL --retry 3 \ + "https://isabelle.in.tum.de/dist/Isabelle2025-2_linux.tar.gz" \ + -o /tmp/isabelle.tar.gz \ + && tar -xzf /tmp/isabelle.tar.gz -C /opt/ \ + && rm /tmp/isabelle.tar.gz \ + && rm -rf /opt/Isabelle2025-2/doc \ + && /opt/Isabelle2025-2/bin/isabelle version \ + && /opt/Isabelle2025-2/bin/isabelle build -b Main \ + && rm -rf /root/.isabelle/Isabelle2025-2/log \ + /root/.isabelle/Isabelle2025-2/browser_info + +# ============================================================================= +# Stage 5: F* installer (binary release) +# ============================================================================= +FROM docker.io/library/debian:bookworm-slim AS fstar-installer +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl ca-certificates jq tar gzip \ + && rm -rf /var/lib/apt/lists/* +# Resolve the latest F* Linux x86_64 release tarball via the GitHub API +# (asset naming varies between releases; don't hard-code a version). +RUN set -eu; \ + url=$(curl -fsSL https://api.github.com/repos/FStarLang/FStar/releases/latest \ + | jq -r '.assets[]|select(.name|test("Linux|linux.*x86_64"))|.browser_download_url' \ + | head -1); \ + if [ -n "$url" ]; then \ + curl -fsSL "$url" -o /tmp/fstar.tar.gz \ + && mkdir -p /opt/fstar \ + && tar -xzf /tmp/fstar.tar.gz -C /opt/fstar --strip-components=1 \ + && /opt/fstar/bin/fstar.exe --version || true; \ + else \ + mkdir -p /opt/fstar/bin; \ + echo "WARN: no F* linux asset found; stage empty" >&2; \ + fi + +# ============================================================================= +# Stage 6: Runtime image +# ============================================================================= +FROM docker.io/library/debian:bookworm-slim + +LABEL maintainer="Jonathan D.A. Jewell " +LABEL org.opencontainers.image.source="https://github.com/hyperpolymath/echidna" +LABEL org.opencontainers.image.description="ECHIDNA MCP server — AI-agent-callable theorem proving over stdio" +LABEL org.opencontainers.image.licenses="PMPL-1.0-or-later" +LABEL org.opencontainers.image.version="2.1.0" + +# Runtime deps: the MVP provers + dynamic libs + perl/fontconfig for Isabelle. +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + libssl3 \ + libgmp10 \ + libstdc++6 \ + perl \ + fontconfig \ + z3 \ + cvc5 \ + coq \ + agda-bin \ + metamath \ + spass \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY --from=rust-builder /build/target/release/echidna /app/bin/echidna +COPY --from=rust-builder /build/target/release/echidna-mcp /app/bin/echidna-mcp +COPY --from=lean-installer /root/.elan /opt/elan +COPY --from=isabelle-installer /opt/Isabelle2025-2 /opt/Isabelle2025-2 +COPY --from=isabelle-installer /root/.isabelle /root/.isabelle +COPY --from=fstar-installer /opt/fstar /opt/fstar + +ENV PATH="/app/bin:/opt/elan/toolchains/stable/bin:/opt/Isabelle2025-2/bin:/opt/fstar/bin:${PATH}" +ENV ECHIDNA_BIN="/app/bin/echidna" + +# Stdio transport — no ports. The client connects via podman run -i. +ENTRYPOINT ["/app/bin/echidna-mcp"] diff --git a/src/rust/provers/abc.rs b/src/rust/provers/abc.rs index da267f9..1a7f8b1 100644 --- a/src/rust/provers/abc.rs +++ b/src/rust/provers/abc.rs @@ -414,20 +414,34 @@ impl ProverBackend for AbcBackend { .context("Failed to read ABC input file")?; let extension = path.extension().and_then(|e| e.to_str()); - self.parse_design(&content, extension) + let mut state = self.parse_design(&content, extension)?; + state.metadata.insert( + "abc_source".to_string(), + serde_json::Value::String(content.clone()), + ); + state.metadata.insert( + "source_path".to_string(), + serde_json::Value::String(path.to_string_lossy().into_owned()), + ); + Ok(state) } async fn parse_string(&self, content: &str) -> Result { // Without file extension context, try ABC command script first, // then fall back to AIGER ASCII if header starts with "aag"/"aig" let trimmed = content.trim(); - if trimmed.starts_with("aag") || trimmed.starts_with("aig") { - self.parse_design(content, Some("aig")) + let mut state = if trimmed.starts_with("aag") || trimmed.starts_with("aig") { + self.parse_design(content, Some("aig"))? } else if trimmed.starts_with(".model") || trimmed.starts_with(".inputs") { - self.parse_design(content, Some("blif")) + self.parse_design(content, Some("blif"))? } else { - self.parse_design(content, None) - } + self.parse_design(content, None)? + }; + state.metadata.insert( + "abc_source".to_string(), + serde_json::Value::String(content.to_string()), + ); + Ok(state) } async fn apply_tactic(&self, state: &ProofState, tactic: &Tactic) -> Result { @@ -567,6 +581,131 @@ impl ProverBackend for AbcBackend { } async fn verify_proof(&self, state: &ProofState) -> Result { + // Prefer the original source — parsing ABC/BLIF/AIGER into the + // generic Term IR and reconstructing via `to_abc_script` loses + // structure for anything beyond a one-command script. + let method = state + .metadata + .get("abc_method") + .and_then(|v| v.as_str()) + .unwrap_or("pdr"); + let bmc_bound = state + .metadata + .get("bmc_bound") + .and_then(|v| v.as_str()) + .unwrap_or("100"); + let verify_cmd = match method { + "bmc" | "bmc3" => format!("bmc3 -F {}", bmc_bound), + "int" => "int".to_string(), + "dprove" => "dprove".to_string(), + "dsec" => "dsec".to_string(), + "cec" => "cec".to_string(), + _ => "pdr".to_string(), + }; + + if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) { + let p = std::path::Path::new(path); + let read_cmd = match p.extension().and_then(|e| e.to_str()) { + Some("aig") => format!("read_aiger {}", path), + Some("blif") => format!("read_blif {}", path), + Some("v") => format!("read_verilog {}", path), + _ => format!("read {}", path), + }; + let script = format!("{}\n{}\nprint_status\nquit\n", read_cmd, verify_cmd); + let tmp_dir = + tempfile::tempdir().context("Failed to create temporary directory for ABC")?; + let tmp_file = tmp_dir.path().join("verify.abc"); + tokio::fs::write(&tmp_file, &script) + .await + .context("Failed to write temporary ABC script file")?; + let output = tokio::time::timeout( + tokio::time::Duration::from_secs(self.config.timeout), + Command::new(&self.config.executable) + .arg("-f") + .arg(&tmp_file) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .output(), + ) + .await + .map_err(|_| anyhow!("ABC timed out after {} seconds", self.config.timeout))? + .context("Failed to execute ABC")?; + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + let combined = format!("{}\n{}", stdout, stderr); + return self.parse_result(&combined); + } + + if let Some(source) = state.metadata.get("abc_source").and_then(|v| v.as_str()) { + let trimmed = source.trim(); + let (ext, read_cmd_base) = + if trimmed.starts_with("aag") || trimmed.starts_with("aig") { + (".aig", "read_aiger") + } else if trimmed.starts_with(".model") || trimmed.starts_with(".inputs") { + (".blif", "read_blif") + } else { + // Inline script — just execute it directly. + ("", "") + }; + let tmp_dir = + tempfile::tempdir().context("Failed to create temporary directory for ABC")?; + if ext.is_empty() { + let tmp_file = tmp_dir.path().join("verify.abc"); + let script = format!("{}\n{}\nprint_status\nquit\n", source, verify_cmd); + tokio::fs::write(&tmp_file, &script) + .await + .context("Failed to write temporary ABC script file")?; + let output = tokio::time::timeout( + tokio::time::Duration::from_secs(self.config.timeout), + Command::new(&self.config.executable) + .arg("-f") + .arg(&tmp_file) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .output(), + ) + .await + .map_err(|_| anyhow!("ABC timed out after {} seconds", self.config.timeout))? + .context("Failed to execute ABC")?; + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + let combined = format!("{}\n{}", stdout, stderr); + return self.parse_result(&combined); + } else { + let design_file = tmp_dir.path().join(format!("design{}", ext)); + tokio::fs::write(&design_file, source) + .await + .context("Failed to write temporary ABC design file")?; + let script_file = tmp_dir.path().join("verify.abc"); + let script = format!( + "{} {}\n{}\nprint_status\nquit\n", + read_cmd_base, + design_file.display(), + verify_cmd + ); + tokio::fs::write(&script_file, &script) + .await + .context("Failed to write temporary ABC script file")?; + let output = tokio::time::timeout( + tokio::time::Duration::from_secs(self.config.timeout), + Command::new(&self.config.executable) + .arg("-f") + .arg(&script_file) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .output(), + ) + .await + .map_err(|_| anyhow!("ABC timed out after {} seconds", self.config.timeout))? + .context("Failed to execute ABC")?; + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + let combined = format!("{}\n{}", stdout, stderr); + return self.parse_result(&combined); + } + } + + // Fallback: reconstruct an ABC script from the parsed IR. let abc_script = self.to_abc_script(state)?; // ABC can execute commands via -c flag or from a script file. diff --git a/src/rust/provers/acl2.rs b/src/rust/provers/acl2.rs index d68fa90..3d563dc 100644 --- a/src/rust/provers/acl2.rs +++ b/src/rust/provers/acl2.rs @@ -1239,7 +1239,12 @@ impl ProverBackend for ACL2Backend { .await .context("Failed to read ACL2 file")?; - self.parse_string(&content).await + let mut state = self.parse_string(&content).await?; + state.metadata.insert( + "source_path".to_string(), + serde_json::Value::String(path.to_string_lossy().into_owned()), + ); + Ok(state) } async fn parse_string(&self, content: &str) -> Result { @@ -1310,11 +1315,16 @@ impl ProverBackend for ACL2Backend { } } + let mut metadata = HashMap::new(); + metadata.insert( + "acl2_source".to_string(), + serde_json::Value::String(content.to_string()), + ); Ok(ProofState { goals, context, proof_script: vec![], - metadata: HashMap::new(), + metadata, }) } @@ -1380,6 +1390,47 @@ impl ProverBackend for ACL2Backend { } async fn verify_proof(&self, state: &ProofState) -> Result { + let exec = self + .config + .executable + .to_str() + .filter(|s| !s.is_empty()) + .unwrap_or("acl2"); + + // Prefer the original Lisp source — ACL2's `term_to_sexp` + // round-trip is lossy for anything beyond trivial arithmetic. + let source_file: Option = + if let Some(p) = state.metadata.get("source_path").and_then(|v| v.as_str()) { + Some(PathBuf::from(p)) + } else if let Some(src) = state.metadata.get("acl2_source").and_then(|v| v.as_str()) { + let tmp = std::env::temp_dir() + .join(format!("echidna_acl2_verify_{}.lisp", uuid::Uuid::new_v4())); + tokio::fs::write(&tmp, src) + .await + .context("Failed to write temp file")?; + Some(tmp) + } else { + None + }; + + if let Some(path) = source_file { + let stdin_file = std::fs::File::open(&path) + .context("Failed to open source file for ACL2 stdin")?; + let output = Command::new(exec) + .stdin(std::process::Stdio::from(stdin_file)) + .output() + .await + .context("Failed to run ACL2")?; + if state.metadata.get("source_path").is_none() { + let _ = tokio::fs::remove_file(&path).await; + } + let output_str = String::from_utf8_lossy(&output.stdout); + let err_str = String::from_utf8_lossy(&output.stderr); + let combined = format!("{}{}", output_str, err_str); + return Ok(combined.contains("Q.E.D.") + || (combined.contains("Summary") && !combined.contains("FAILED"))); + } + if state.goals.is_empty() { return Ok(true); } @@ -1394,14 +1445,6 @@ impl ProverBackend for ACL2Backend { .await .context("Failed to write temp file")?; - // Run ACL2 in batch mode - let exec = self - .config - .executable - .to_str() - .filter(|s| !s.is_empty()) - .unwrap_or("acl2"); - let stdin_file = std::fs::File::open(&temp_file).context("Failed to open temp file for ACL2 stdin")?; diff --git a/src/rust/provers/agda.rs b/src/rust/provers/agda.rs index 2346ccf..b58c4da 100644 --- a/src/rust/provers/agda.rs +++ b/src/rust/provers/agda.rs @@ -255,7 +255,12 @@ impl ProverBackend for AgdaBackend { let content = tokio::fs::read_to_string(&path) .await .context("Failed to read Agda file")?; - self.parse_string(&content).await + let mut state = self.parse_string(&content).await?; + state.metadata.insert( + "source_path".to_string(), + serde_json::Value::String(path.to_string_lossy().into_owned()), + ); + Ok(state) } async fn parse_string(&self, content: &str) -> Result { @@ -299,11 +304,16 @@ impl ProverBackend for AgdaBackend { context.theorems = theorems; let goals = self.extract_goals(content).await?; + let mut metadata = HashMap::new(); + metadata.insert( + "agda_source".to_string(), + serde_json::Value::String(content.to_string()), + ); Ok(ProofState { goals, context, proof_script: Vec::new(), - metadata: HashMap::new(), + metadata, }) } @@ -349,6 +359,51 @@ impl ProverBackend for AgdaBackend { } async fn verify_proof(&self, state: &ProofState) -> Result { + // Prefer the original .agda file when parse_file stashed its path. + // The previous `state.goals.is_empty() → Ok(true)` shortcut produced + // fake-success — Agda's parser doesn't populate goals the way the + // generic pipeline expects, so genuine files hit it without the + // solver ever running. + // + // Agda checks that the top-level module name matches the file path + // *relative to its CWD*, so we spawn the process from the file's + // parent directory — otherwise every non-trivial file trips + // "module … does not match file name". + if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) { + let p = std::path::Path::new(path); + let mut cmd = Command::new(&self.config.executable); + if let Some(parent) = p.parent() { + cmd.current_dir(parent); + } + let output = cmd + .arg(p.file_name().map(std::path::Path::new).unwrap_or(p)) + .output() + .await?; + return Ok(output.status.success()); + } + if let Some(source) = state.metadata.get("agda_source").and_then(|v| v.as_str()) { + let temp_file = std::env::temp_dir() + .join(format!("echidna_agda_verify_{}.agda", uuid::Uuid::new_v4())); + tokio::fs::write(&temp_file, source).await?; + let mut cmd = Command::new(&self.config.executable); + if let Some(parent) = temp_file.parent() { + cmd.current_dir(parent); + } + let output = cmd + .arg( + temp_file + .file_name() + .map(std::path::Path::new) + .unwrap_or(&temp_file), + ) + .output() + .await?; + let _ = tokio::fs::remove_file(&temp_file).await; + return Ok(output.status.success()); + } + + // Fallback: reconstruct from IR (lossy). Only hit for synthetic + // states never backed by real source. if state.goals.is_empty() { return Ok(true); } diff --git a/src/rust/provers/altergo.rs b/src/rust/provers/altergo.rs index 60178d3..160cedb 100644 --- a/src/rust/provers/altergo.rs +++ b/src/rust/provers/altergo.rs @@ -112,14 +112,23 @@ impl ProverBackend for AltErgoBackend { } async fn parse_file(&self, path: PathBuf) -> Result { - let content = tokio::fs::read_to_string(path) + let content = tokio::fs::read_to_string(&path) .await .context("Failed to read proof file")?; - self.parse_string(&content).await + let mut state = self.parse_string(&content).await?; + state.metadata.insert( + "source_path".to_string(), + serde_json::Value::String(path.to_string_lossy().into_owned()), + ); + Ok(state) } async fn parse_string(&self, content: &str) -> Result { let mut state = ProofState::default(); + state.metadata.insert( + "altergo_source".to_string(), + serde_json::Value::String(content.to_string()), + ); for line in content.lines() { let line = line.trim(); @@ -154,19 +163,36 @@ impl ProverBackend for AltErgoBackend { } async fn verify_proof(&self, state: &ProofState) -> Result { - let ae_code = self.to_altergo(state)?; - - // Write to temp file since Alt-Ergo prefers file input - let tmp_dir = std::env::temp_dir(); - let tmp_file = tmp_dir.join("echidna_altergo_input.ae"); - tokio::fs::write(&tmp_file, ae_code.as_bytes()) - .await - .context("Failed to write temp file")?; + // Prefer the original .ae source — `to_altergo(state)` round-trips + // Alt-Ergo syntax through the generic Term IR, which mangles + // anything beyond toy inputs. + let (path_to_check, is_temp): (PathBuf, bool) = + if let Some(p) = state.metadata.get("source_path").and_then(|v| v.as_str()) { + (PathBuf::from(p), false) + } else if let Some(src) = state + .metadata + .get("altergo_source") + .and_then(|v| v.as_str()) + { + let tmp = std::env::temp_dir() + .join(format!("echidna_altergo_{}.ae", uuid::Uuid::new_v4())); + tokio::fs::write(&tmp, src) + .await + .context("Failed to write temp file")?; + (tmp, true) + } else { + let ae_code = self.to_altergo(state)?; + let tmp = std::env::temp_dir().join("echidna_altergo_input.ae"); + tokio::fs::write(&tmp, ae_code.as_bytes()) + .await + .context("Failed to write temp file")?; + (tmp, true) + }; let output = tokio::time::timeout( tokio::time::Duration::from_secs(self.config.timeout + 5), Command::new(&self.config.executable) - .arg(&tmp_file) + .arg(&path_to_check) .arg("--timelimit") .arg(format!("{}", self.config.timeout)) .output(), @@ -174,8 +200,9 @@ impl ProverBackend for AltErgoBackend { .await .context("Alt-Ergo timed out")??; - // Clean up temp file - let _ = tokio::fs::remove_file(&tmp_file).await; + if is_temp { + let _ = tokio::fs::remove_file(&path_to_check).await; + } let stdout = String::from_utf8_lossy(&output.stdout); self.parse_result(&stdout) diff --git a/src/rust/provers/coq.rs b/src/rust/provers/coq.rs index e36c81d..0b2408a 100644 --- a/src/rust/provers/coq.rs +++ b/src/rust/provers/coq.rs @@ -834,7 +834,12 @@ impl ProverBackend for CoqBackend { .await .context("Failed to read file")?; - self.parse_string(&content).await + let mut state = self.parse_string(&content).await?; + state.metadata.insert( + "source_path".to_string(), + serde_json::Value::String(path.to_string_lossy().into_owned()), + ); + Ok(state) } async fn parse_string(&self, content: &str) -> Result { @@ -891,11 +896,16 @@ impl ProverBackend for CoqBackend { } } + let mut metadata = HashMap::new(); + metadata.insert( + "coq_source".to_string(), + serde_json::Value::String(content.to_string()), + ); Ok(ProofState { goals, context, proof_script: proof_tactics, - metadata: HashMap::new(), + metadata, }) } @@ -922,10 +932,36 @@ impl ProverBackend for CoqBackend { } async fn verify_proof(&self, state: &ProofState) -> Result { - // Export the proof and try to check it - let proof_script = self.export(state).await?; + // Prefer the original file when parse_file stashed its path — runs + // coqc on exactly what the user wrote, avoiding the lossy export + // round-trip (the Coq parser's Term IR is incomplete for anything + // beyond trivial syntax). + if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) { + let output = Command::new("coqc") + .arg(path) + .output() + .await + .context("Failed to run coqc")?; + return Ok(output.status.success()); + } + if let Some(source) = state.metadata.get("coq_source").and_then(|v| v.as_str()) { + let temp_file = std::env::temp_dir() + .join(format!("echidna_coq_verify_{}.v", uuid::Uuid::new_v4())); + tokio::fs::write(&temp_file, source) + .await + .context("Failed to write temp file")?; + let output = Command::new("coqc") + .arg(&temp_file) + .output() + .await + .context("Failed to run coqc")?; + let _ = tokio::fs::remove_file(&temp_file).await; + return Ok(output.status.success()); + } - // Write to temporary file and check with coqc + // Fallback: reconstruct from IR. Lossy; only hit for synthetic states + // that were never parsed from real Coq source. + let proof_script = self.export(state).await?; let temp_file = std::env::temp_dir().join("echidna_coq_verify.v"); tokio::fs::write(&temp_file, proof_script) .await diff --git a/src/rust/provers/cvc5.rs b/src/rust/provers/cvc5.rs index 31aa957..0f53127 100644 --- a/src/rust/provers/cvc5.rs +++ b/src/rust/provers/cvc5.rs @@ -547,11 +547,21 @@ impl ProverBackend for CVC5Backend { let content = tokio::fs::read_to_string(&path) .await .context(format!("Failed to read file: {:?}", path))?; - self.parse_string(&content).await + let mut state = self.parse_string(&content).await?; + state.metadata.insert( + "source_path".to_string(), + serde_json::Value::String(path.to_string_lossy().into_owned()), + ); + Ok(state) } async fn parse_string(&self, content: &str) -> Result { - self.parse_smtlib_content(content).await + let mut state = self.parse_smtlib_content(content).await?; + state.metadata.insert( + "cvc5_source".to_string(), + serde_json::Value::String(content.to_string()), + ); + Ok(state) } async fn apply_tactic(&self, state: &ProofState, tactic: &Tactic) -> Result { @@ -603,21 +613,30 @@ impl ProverBackend for CVC5Backend { } async fn verify_proof(&self, state: &ProofState) -> Result { - if state.goals.is_empty() { - return Ok(true); - } - - let mut commands = String::new(); - commands.push_str("(set-logic ALL)\n"); - for var in &state.context.variables { - let ty = self.term_to_smtlib(&var.ty)?; - commands.push_str(&format!("(declare-const {} {})\n", var.name, ty)); - } - for goal in &state.goals { - let smtlib = self.term_to_smtlib(&goal.target)?; - commands.push_str(&format!("(assert (not {}))\n", smtlib)); - } - commands.push_str("(check-sat)\n"); + // Prefer the original SMT-LIB source when parse_file / parse_string + // stashed it. Skipping the Term IR round-trip is the only way to + // keep non-trivial CVC5 inputs intact. + let commands = if let Some(source) = + state.metadata.get("cvc5_source").and_then(|v| v.as_str()) + { + source.to_string() + } else { + if state.goals.is_empty() { + return Ok(true); + } + let mut s = String::new(); + s.push_str("(set-logic ALL)\n"); + for var in &state.context.variables { + let ty = self.term_to_smtlib(&var.ty)?; + s.push_str(&format!("(declare-const {} {})\n", var.name, ty)); + } + for goal in &state.goals { + let smtlib = self.term_to_smtlib(&goal.target)?; + s.push_str(&format!("(assert (not {}))\n", smtlib)); + } + s.push_str("(check-sat)\n"); + s + }; let temp_file = std::env::temp_dir().join(format!("echidna_cvc5_verify_{}.smt2", Uuid::new_v4())); diff --git a/src/rust/provers/eprover.rs b/src/rust/provers/eprover.rs index 75ccfb2..c08a074 100644 --- a/src/rust/provers/eprover.rs +++ b/src/rust/provers/eprover.rs @@ -96,14 +96,23 @@ impl ProverBackend for EProverBackend { } async fn parse_file(&self, path: PathBuf) -> Result { - let content = tokio::fs::read_to_string(path) + let content = tokio::fs::read_to_string(&path) .await .context("Failed to read proof file")?; - self.parse_string(&content).await + let mut state = self.parse_string(&content).await?; + state.metadata.insert( + "source_path".to_string(), + serde_json::Value::String(path.to_string_lossy().into_owned()), + ); + Ok(state) } async fn parse_string(&self, content: &str) -> Result { let mut state = ProofState::default(); + state.metadata.insert( + "eprover_source".to_string(), + serde_json::Value::String(content.to_string()), + ); for line in content.lines() { let line = line.trim(); @@ -137,7 +146,37 @@ impl ProverBackend for EProverBackend { } async fn verify_proof(&self, state: &ProofState) -> Result { - let tptp_code = self.to_tptp(state)?; + // Prefer the original TPTP source when parse_file / parse_string + // stashed it. `to_tptp(state)` round-trips through the generic + // Term IR and silently mangles anything beyond toy axioms. + if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) { + let output = tokio::time::timeout( + tokio::time::Duration::from_secs(self.config.timeout + 5), + Command::new(&self.config.executable) + .arg("--auto") + .arg("--tptp3-format") + .arg("--cpu-limit") + .arg(format!("{}", self.config.timeout)) + .arg("--proof-object=0") + .arg(path) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .output(), + ) + .await + .context("E Prover timed out")??; + let stdout = String::from_utf8_lossy(&output.stdout); + return self.parse_result(&stdout); + } + let tptp_code = if let Some(src) = state + .metadata + .get("eprover_source") + .and_then(|v| v.as_str()) + { + src.to_string() + } else { + self.to_tptp(state)? + }; let mut child = Command::new(&self.config.executable) .arg("--auto") diff --git a/src/rust/provers/hol4.rs b/src/rust/provers/hol4.rs index fb5c236..a401f53 100644 --- a/src/rust/provers/hol4.rs +++ b/src/rust/provers/hol4.rs @@ -2149,7 +2149,16 @@ impl ProverBackend for Hol4Backend { .await .context("Failed to read HOL4 file")?; - self.parse_string(&content).await + let mut state = self.parse_string(&content).await?; + state.metadata.insert( + "source_path".to_string(), + serde_json::Value::String(path.to_string_lossy().into_owned()), + ); + state.metadata.insert( + "hol4_source".to_string(), + serde_json::Value::String(content), + ); + Ok(state) } async fn parse_string(&self, content: &str) -> Result { @@ -2245,6 +2254,28 @@ impl ProverBackend for Hol4Backend { } async fn verify_proof(&self, state: &ProofState) -> Result { + // Previously `Ok(state.goals.is_empty())` — fake success without + // ever invoking HOL4. When parse_file stashed the path, feed the + // file to HOL4 via Holmake (the standard batch build driver). + if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) { + let p = std::path::Path::new(path); + let mut cmd = Command::new(&self.config.executable); + if let Some(parent) = p.parent() { + cmd.current_dir(parent); + } + cmd.arg("--holdir") + .arg(p.file_name().map(std::path::Path::new).unwrap_or(p)); + let output = cmd.output().await.context("Failed to invoke HOL4")?; + let combined = format!( + "{}{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let verified = output.status.success() + && !combined.to_lowercase().contains("exception") + && !combined.to_lowercase().contains("error"); + return Ok(verified); + } Ok(state.goals.is_empty()) } diff --git a/src/rust/provers/metamath.rs b/src/rust/provers/metamath.rs index 7c0d107..542241f 100644 --- a/src/rust/provers/metamath.rs +++ b/src/rust/provers/metamath.rs @@ -287,7 +287,12 @@ impl ProverBackend for MetamathBackend { let content = fs::read_to_string(&path) .await .with_context(|| format!("Failed to read file: {:?}", path))?; - self.parse_string(&content).await + let mut state = self.parse_string(&content).await?; + state.metadata.insert( + "source_path".to_string(), + serde_json::Value::String(path.to_string_lossy().into_owned()), + ); + Ok(state) } async fn parse_string(&self, content: &str) -> Result { @@ -397,13 +402,54 @@ impl ProverBackend for MetamathBackend { async fn verify_proof(&self, state: &ProofState) -> Result { info!("Verifying Metamath proof with {} goals", state.goals.len()); - if !state.goals.is_empty() { - debug!("Proof incomplete: {} goals remaining", state.goals.len()); - return Ok(false); + // Real verification path: shell out to the `metamath` CLI and run + // its `verify proof *` command over the original source. The + // previous stub — returning `Ok(state.goals.is_empty())` — never + // invoked the solver at all. + let source_path = state + .metadata + .get("source_path") + .and_then(|v| v.as_str()) + .map(PathBuf::from); + + if let Some(path) = source_path { + use tokio::io::AsyncWriteExt; + use tokio::process::Command; + + let mut child = Command::new(&self.config.executable) + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .spawn() + .context("Failed to spawn metamath")?; + + if let Some(mut stdin) = child.stdin.take() { + let script = format!( + "read \"{}\"\nverify proof *\nexit\n", + path.display() + ); + stdin.write_all(script.as_bytes()).await?; + stdin.flush().await?; + drop(stdin); + } + + let output = child.wait_with_output().await?; + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + let combined = format!("{}{}", stdout, stderr); + + // metamath prints either "All proofs in the database were + // verified" on success, or "?Error" / "proof is incomplete" + // on failure. + let verified = combined.contains("All proofs in the database were verified") + && !combined.contains("?Error") + && !combined.to_lowercase().contains("proof is incomplete"); + return Ok(verified); } - // All goals satisfied - Ok(true) + // Fallback when no source path was stashed (synthetic ProofState): + // all we can say is "no goals left means OK". + Ok(state.goals.is_empty()) } async fn export(&self, state: &ProofState) -> Result { diff --git a/src/rust/provers/pvs.rs b/src/rust/provers/pvs.rs index 331bf92..6b25fa1 100644 --- a/src/rust/provers/pvs.rs +++ b/src/rust/provers/pvs.rs @@ -2692,7 +2692,16 @@ impl ProverBackend for PVSBackend { .await .context("Failed to read PVS file")?; - self.parse_string(&content).await + let mut state = self.parse_string(&content).await?; + state.metadata.insert( + "source_path".to_string(), + serde_json::Value::String(path.to_string_lossy().into_owned()), + ); + state.metadata.insert( + "pvs_source".to_string(), + serde_json::Value::String(content), + ); + Ok(state) } async fn parse_string(&self, content: &str) -> Result { @@ -2824,6 +2833,29 @@ impl ProverBackend for PVSBackend { } async fn verify_proof(&self, state: &ProofState) -> Result { + // The previous impl was a hollow stub — `Ok(state.goals.is_empty())` + // would happily report SUCCESS on a file that never went near PVS. + // Shell out to PVS in batch mode against the original source when + // it was stashed by parse_file; fall through to the legacy "empty + // goals == ok" shortcut only for synthetic ProofStates. + if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) { + let output = Command::new(&self.config.executable) + .arg("-batch") + .arg("-typecheck") + .arg(path) + .output() + .await + .context("Failed to invoke PVS")?; + let combined = format!( + "{}{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + let verified = output.status.success() + && !combined.to_lowercase().contains("error") + && !combined.to_lowercase().contains("typecheck failed"); + return Ok(verified); + } Ok(state.goals.is_empty()) } diff --git a/src/rust/provers/spass.rs b/src/rust/provers/spass.rs index 61e318d..33755b9 100644 --- a/src/rust/provers/spass.rs +++ b/src/rust/provers/spass.rs @@ -18,7 +18,6 @@ use anyhow::{Context, Result}; use async_trait::async_trait; use std::path::PathBuf; use std::process::Stdio; -use tokio::io::AsyncWriteExt; use tokio::process::Command; use super::{ProverBackend, ProverConfig, ProverKind}; @@ -105,14 +104,23 @@ impl ProverBackend for SPASSBackend { } async fn parse_file(&self, path: PathBuf) -> Result { - let content = tokio::fs::read_to_string(path) + let content = tokio::fs::read_to_string(&path) .await .context("Failed to read proof file")?; - self.parse_string(&content).await + let mut state = self.parse_string(&content).await?; + state.metadata.insert( + "source_path".to_string(), + serde_json::Value::String(path.to_string_lossy().into_owned()), + ); + Ok(state) } async fn parse_string(&self, content: &str) -> Result { let mut state = ProofState::default(); + state.metadata.insert( + "spass_source".to_string(), + serde_json::Value::String(content.to_string()), + ); let mut in_axioms = false; let mut in_conjectures = false; @@ -157,28 +165,47 @@ impl ProverBackend for SPASSBackend { } async fn verify_proof(&self, state: &ProofState) -> Result { - let dfg_code = self.to_dfg(state)?; - - let mut child = Command::new(&self.config.executable) - .arg("-TimeLimit") - .arg(format!("{}", self.config.timeout)) - .arg("-PGiven=0") - .arg("-PProblem=0") - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn() - .context("Failed to spawn SPASS process")?; - - if let Some(mut stdin) = child.stdin.take() { - stdin.write_all(dfg_code.as_bytes()).await?; - stdin.flush().await?; - drop(stdin); - } - + // Prefer the original DFG source when parse_file / parse_string + // stashed it. `to_dfg(state)` reconstructs DFG from the parsed + // conjecture list, which silently mangles anything beyond toy + // single-atom formulae. If a real path is available, pass that + // directly; otherwise materialise the stashed source (or the + // lossy reconstruction) to a temp file. + // + // SPASS rejects input on stdin — the upstream binary parses the + // stdin stream as additional command-line flags — so we always + // pass a file path as the final positional argument. + let path_to_check: PathBuf = + if let Some(p) = state.metadata.get("source_path").and_then(|v| v.as_str()) { + PathBuf::from(p) + } else { + let dfg_code = state + .metadata + .get("spass_source") + .and_then(|v| v.as_str()) + .map(ToOwned::to_owned) + .map(Ok) + .unwrap_or_else(|| self.to_dfg(state))?; + let tmp = std::env::temp_dir() + .join(format!("echidna_spass_{}.dfg", uuid::Uuid::new_v4())); + tokio::fs::write(&tmp, dfg_code).await?; + tmp + }; + + // Don't pass -PGiven=0 / -PProblem=0 here: those suppress the + // "SPASS beiseite: Proof found." banner that `parse_result` + // matches on. + // SPASS flag syntax is `-Name=Value` (not `-Name Value`). Passing + // them separated makes SPASS parse the number as a filename and + // exit with "Error in opening file N for reading". let output = tokio::time::timeout( tokio::time::Duration::from_secs(self.config.timeout + 5), - child.wait_with_output(), + Command::new(&self.config.executable) + .arg(format!("-TimeLimit={}", self.config.timeout)) + .arg(&path_to_check) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .output(), ) .await .context("SPASS timed out")??; diff --git a/src/rust/provers/vampire.rs b/src/rust/provers/vampire.rs index 852f119..9fd03c5 100644 --- a/src/rust/provers/vampire.rs +++ b/src/rust/provers/vampire.rs @@ -102,14 +102,23 @@ impl ProverBackend for VampireBackend { } async fn parse_file(&self, path: PathBuf) -> Result { - let content = tokio::fs::read_to_string(path) + let content = tokio::fs::read_to_string(&path) .await .context("Failed to read proof file")?; - self.parse_string(&content).await + let mut state = self.parse_string(&content).await?; + state.metadata.insert( + "source_path".to_string(), + serde_json::Value::String(path.to_string_lossy().into_owned()), + ); + Ok(state) } async fn parse_string(&self, content: &str) -> Result { let mut state = ProofState::default(); + state.metadata.insert( + "vampire_source".to_string(), + serde_json::Value::String(content.to_string()), + ); for line in content.lines() { let line = line.trim(); @@ -143,7 +152,45 @@ impl ProverBackend for VampireBackend { } async fn verify_proof(&self, state: &ProofState) -> Result { - let tptp_code = self.to_tptp(state)?; + // Prefer the original TPTP source — `to_tptp(state)` round-trips + // through the generic Term IR and silently mangles non-trivial + // inputs. + if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) { + let output = tokio::time::timeout( + tokio::time::Duration::from_secs(self.config.timeout + 5), + Command::new(&self.config.executable) + .arg("--mode") + .arg("casc") + .arg("--input_syntax") + .arg("tptp") + .arg("--time_limit") + .arg(format!("{}", self.config.timeout)) + .arg("--proof") + .arg("off") + .arg("--statistics") + .arg("brief") + .arg(path) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .output(), + ) + .await + .context("Vampire timed out")??; + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + return self + .parse_result(&stdout) + .or_else(|_| self.parse_result(&stderr)); + } + let tptp_code = if let Some(src) = state + .metadata + .get("vampire_source") + .and_then(|v| v.as_str()) + { + src.to_string() + } else { + self.to_tptp(state)? + }; let mut child = Command::new(&self.config.executable) .arg("--mode") diff --git a/src/rust/provers/z3.rs b/src/rust/provers/z3.rs index 6f01d69..f740902 100644 --- a/src/rust/provers/z3.rs +++ b/src/rust/provers/z3.rs @@ -236,11 +236,21 @@ impl ProverBackend for Z3Backend { .await .with_context(|| format!("Failed to read file: {:?}", path))?; - self.parse_string(&content).await + let mut state = self.parse_string(&content).await?; + state.metadata.insert( + "source_path".to_string(), + serde_json::Value::String(path.to_string_lossy().into_owned()), + ); + Ok(state) } async fn parse_string(&self, content: &str) -> Result { - self.parse_smt_file(content) + let mut state = self.parse_smt_file(content)?; + state.metadata.insert( + "z3_source".to_string(), + serde_json::Value::String(content.to_string()), + ); + Ok(state) } async fn apply_tactic(&self, state: &ProofState, tactic: &Tactic) -> Result { @@ -308,6 +318,15 @@ impl ProverBackend for Z3Backend { } async fn verify_proof(&self, state: &ProofState) -> Result { + // Prefer the original SMT-LIB source when it was stashed by + // parse_file / parse_string. Running Z3 on the raw user input + // avoids the lossy parse → Term IR → term_to_smt round-trip, + // which silently corrupts anything beyond trivial inputs. + if let Some(source) = state.metadata.get("z3_source").and_then(|v| v.as_str()) { + let result = self.execute_command(source).await?; + return Ok(matches!(result, SmtResult::Unsat)); + } + if state.goals.is_empty() { return Ok(true); } @@ -326,17 +345,17 @@ impl ProverBackend for Z3Backend { return Ok(false); } - // Build complete SMT-LIB query with variable declarations + // Fallback: reconstruct an SMT-LIB query from the parsed IR. + // Lossy for anything complex; only reached when no raw source + // was captured (e.g. synthetic ProofState built at runtime). let mut commands = String::new(); commands.push_str("(set-logic ALL)\n"); - // Include variable declarations from context for var in &state.context.variables { let ty_smt = self.term_to_smt(&var.ty); commands.push_str(&format!("(declare-const {} {})\n", var.name, ty_smt)); } - // Assert negation of each goal (if unsat, goal is valid) for goal in &state.goals { let smt_goal = self.term_to_smt(&goal.target); commands.push_str(&format!("(assert (not {}))\n", smt_goal));