From 06b82b9a6fa0e272331fb164a31332c709b3207e Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 22 Apr 2026 16:39:05 +0000
Subject: [PATCH 1/7] feat(mcp): add echidna-mcp server so AI agents can call
 ECHIDNA as a tool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Exposes the `echidna` CLI through the Model Context Protocol so Claude
Code / Claude API / other MCP-aware agents can invoke ECHIDNA's 105
prover backends as a first-class tool-use action.

### What ships

- New workspace member `crates/echidna-mcp/` with a Rust binary built
  on rmcp 1.5 (`ToolRouter` + `#[tool_router]` / `#[tool_handler]`
  macros, stdio transport). Single tool: `prove`.
- Tool schema (auto-generated from `ProveParams` via schemars 1):
  - `file: string` (required) — path to proof file (SMT-LIB, Lean,
    Coq, Agda, Isabelle, Mizar, F*, SPASS, Metamath, …)
  - `prover: string` (optional) — backend name; auto-detect if absent
  - `timeout_secs: u32` (optional, default 300)
  Returns `{verified, message, prover, raw_output, stderr}`.
- Stdio transport; log goes to stderr (so MCP clients don't see it on
  stdout). Echidna binary path via `ECHIDNA_BIN` env or `echidna` in PATH.
- Claude Code wiring: `.claude/settings.json` with
  `{"mcpServers": {"echidna": {"command": "echidna-mcp"}}}`.

### Smoke test (verified on this branch)

1. Install z3 (apt) — 4.8.12 in PATH.
2. Trivial SMT-LIB file `(set-logic QF_LIA) (declare-const x Int)
   (assert (> x 0)) (assert (< x 2)) (check-sat)`.
3. Feed the MCP server: initialize → notifications/initialized →
   tools/list → tools/call(prove, {file, prover:"Z3", timeout_secs:30}).
4. Response: `{"verified": true, "message": "✓ Proof verified
   successfully...", "prover": "Z3", ...}`.

End-to-end path works: MCP JSON-RPC → echidna CLI → Z3 → verdict.

### Roadmap update (docs/handover/TODO.md)

Added "Wave-5 (new backend targets, no adapter yet)" section tracking
prover backends that do not yet have an adapter in src/rust/provers/:
- **Andromeda** — scaffold `provers/andromeda.rs`, shells out to
  `andromeda` CLI (OCaml source build required). Open-source (MIT).
- **Theorema** — deferred (Mathematica-based, commercial licence).
- **Globular** — not a CLI prover (web UI for higher category theory);
  skipped unless scoped to graphical proof capture.

### Next sessions

- Install + wire the remaining 7 provers from the agreed MVP subset:
  Coq/Rocq, Isabelle, Agda, Mizar, F*, Lean 4, SPASS (SPASS apt pkg
  unavailable on this env, needs manual), Metamath.
- Containerfile: ECHIDNA + curated provers + the MCP server, so
  agents can `podman run` a self-contained proving service.
- Andromeda backend scaffold (Wave-5 entry #1).
---
 Cargo.lock                     | 118 ++++++++++++++++++++
 Cargo.toml                     |   1 +
 crates/echidna-mcp/Cargo.toml  |  24 ++++
 crates/echidna-mcp/src/main.rs | 194 +++++++++++++++++++++++++++++++++
 docs/handover/TODO.md          |  10 ++
 5 files changed, 347 insertions(+)
 create mode 100644 crates/echidna-mcp/Cargo.toml
 create mode 100644 crates/echidna-mcp/src/main.rs

diff --git a/Cargo.lock b/Cargo.lock
index 211c7e2..308dd45 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -989,6 +989,12 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "dyn-clone"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
+
 [[package]]
 name = "echidna"
 version = "2.1.0"
@@ -1070,6 +1076,20 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "echidna-mcp"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "rmcp",
+ "schemars",
+ "serde",
+ "serde_json",
+ "tokio",
+ "tracing",
+ "tracing-subscriber",
+]
+
 [[package]]
 name = "echidna-rest"
 version = "0.1.0"
@@ -2177,6 +2197,12 @@ dependencies = [
  "windows-link",
 ]
 
+[[package]]
+name = "pastey"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b867cad97c0791bbd3aaa6472142568c6c9e8f71937e98379f584cfb0cf35bec"
+
 [[package]]
 name = "percent-encoding"
 version = "2.3.2"
@@ -2632,6 +2658,26 @@ dependencies = [
  "bitflags",
 ]
 
+[[package]]
+name = "ref-cast"
+version = "1.0.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d"
+dependencies = [
+ "ref-cast-impl",
+]
+
+[[package]]
+name = "ref-cast-impl"
+version = "1.0.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "regex"
 version = "1.12.3"
@@ -2719,6 +2765,41 @@ dependencies = [
  "windows-sys 0.52.0",
 ]
 
+[[package]]
+name = "rmcp"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67d69668de0b0ccd9cc435f700f3b39a7861863cf37a15e1f304ea78688a4826"
+dependencies = [
+ "async-trait",
+ "base64",
+ "chrono",
+ "futures",
+ "pastey",
+ "pin-project-lite",
+ "rmcp-macros",
+ "schemars",
+ "serde",
+ "serde_json",
+ "thiserror",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
+[[package]]
+name = "rmcp-macros"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48fdc01c81097b0aed18633e676e269fefa3a78ec1df56b4fe597c1241b92025"
+dependencies = [
+ "darling 0.23.0",
+ "proc-macro2",
+ "quote",
+ "serde_json",
+ "syn",
+]
+
 [[package]]
 name = "rust-embed"
 version = "8.11.0"
@@ -2881,6 +2962,32 @@ dependencies = [
  "windows-sys 0.61.2",
 ]
 
+[[package]]
+name = "schemars"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc"
+dependencies = [
+ "chrono",
+ "dyn-clone",
+ "ref-cast",
+ "schemars_derive",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "schemars_derive"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d115b50f4aaeea07e79c1912f645c7513d81715d0420f8bc77a18c6260b307f"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "serde_derive_internals",
+ "syn",
+]
+
 [[package]]
 name = "scopeguard"
 version = "1.2.0"
@@ -2946,6 +3053,17 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "serde_derive_internals"
+version = "0.29.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "serde_json"
 version = "1.0.149"
diff --git a/Cargo.toml b/Cargo.toml
index f110bf1..f1a5182 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -112,6 +112,7 @@ harness = false
 
 [workspace]
 members = [
+    "crates/echidna-mcp",
     "crates/echidna-wire",
     "crates/typed_wasm",
     "src/interfaces/graphql",
diff --git a/crates/echidna-mcp/Cargo.toml b/crates/echidna-mcp/Cargo.toml
new file mode 100644
index 0000000..0d666b7
--- /dev/null
+++ b/crates/echidna-mcp/Cargo.toml
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: PMPL-1.0-or-later
+
+[package]
+name        = "echidna-mcp"
+version     = "0.1.0"
+edition     = "2021"
+authors     = ["Jonathan D.A. Jewell <j.d.a.jewell@open.ac.uk>"]
+license     = "PMPL-1.0-or-later"
+description = "Model Context Protocol server exposing ECHIDNA to AI coding agents (Claude Code, Claude API, etc.)"
+repository  = "https://github.com/hyperpolymath/echidna"
+
+[[bin]]
+name = "echidna-mcp"
+path = "src/main.rs"
+
+[dependencies]
+rmcp        = { version = "1", features = ["server", "macros", "transport-io"] }
+serde       = { version = "1", features = ["derive"] }
+serde_json  = "1"
+schemars    = "1"
+tokio       = { version = "1", features = ["rt-multi-thread", "macros", "process", "io-util"] }
+anyhow      = "1"
+tracing     = "0.1"
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
diff --git a/crates/echidna-mcp/src/main.rs b/crates/echidna-mcp/src/main.rs
new file mode 100644
index 0000000..01541a1
--- /dev/null
+++ b/crates/echidna-mcp/src/main.rs
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: PMPL-1.0-or-later
+
+//! ECHIDNA MCP Server — exposes the `echidna` CLI as Model Context Protocol
+//! tools so Claude Code / Claude API agents can call on the 105-prover
+//! portfolio as first-class tool-use actions.
+//!
+//! # Running
+//!
+//! ```sh
+//! echidna-mcp                          # serves on stdio (default MCP transport)
+//! ECHIDNA_BIN=/abs/path/echidna echidna-mcp
+//! ```
+//!
+//! # Claude Code integration
+//!
+//! Add to `.claude/settings.json`:
+//!
+//! ```json
+//! { "mcpServers": { "echidna": { "command": "echidna-mcp" } } }
+//! ```
+
+use rmcp::{
+    handler::server::{router::tool::ToolRouter, wrapper::Parameters},
+    model::{ServerCapabilities, ServerInfo},
+    schemars::JsonSchema,
+    tool, tool_handler, tool_router,
+    transport::stdio,
+    ServerHandler, ServiceExt,
+};
+use serde::{Deserialize, Serialize};
+use tokio::process::Command;
+
+fn echidna_bin() -> String {
+    std::env::var("ECHIDNA_BIN").unwrap_or_else(|_| "echidna".to_string())
+}
+
+#[derive(Debug, Deserialize, Serialize, JsonSchema)]
+struct ProveParams {
+    /// Absolute path to the proof file. Accepted formats depend on the
+    /// chosen prover: SMT-LIB `.smt2` for Z3/CVC5/etc., Lean `.lean` for
+    /// Lean 4, Coq `.v` for Coq/Rocq, Agda `.agda`, Isabelle `.thy`,
+    /// Mizar `.miz`, F* `.fst`, SPASS DFG `.dfg`, Metamath `.mm`, etc.
+    pub file: String,
+
+    /// Prover backend name (case-sensitive). Common values: `Z3`, `Lean`,
+    /// `Coq`, `Agda`, `Isabelle`, `Mizar`, `FStar`, `SPASS`, `Vampire`,
+    /// `CVC5`, `Metamath`. If omitted, ECHIDNA auto-detects from the file
+    /// extension.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub prover: Option<String>,
+
+    /// Timeout in seconds for the prover invocation. Default 300.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub timeout_secs: Option<u32>,
+}
+
+#[derive(Debug, Serialize)]
+struct ProveResult {
+    verified: bool,
+    message: String,
+    prover: Option<String>,
+    raw_output: String,
+    stderr: String,
+}
+
+#[derive(Clone)]
+struct EchidnaMcp {
+    tool_router: ToolRouter<Self>,
+}
+
+impl EchidnaMcp {
+    pub fn new() -> Self {
+        Self {
+            tool_router: Self::tool_router(),
+        }
+    }
+}
+
+#[tool_router(router = tool_router)]
+impl EchidnaMcp {
+    /// Prove a theorem using one of ECHIDNA's 105 prover backends.
+    #[tool(
+        name = "prove",
+        description = "Prove a theorem from a file using ECHIDNA's 105-prover \
+                       portfolio. Returns a JSON object with {verified, message, \
+                       prover, raw_output, stderr}. Common provers: Z3, Lean, Coq, \
+                       Agda, Isabelle, Mizar, FStar, SPASS, Metamath."
+    )]
+    pub async fn prove(&self, params: Parameters<ProveParams>) -> String {
+        let ProveParams {
+            file,
+            prover,
+            timeout_secs,
+        } = params.0;
+
+        let mut cmd = Command::new(echidna_bin());
+        cmd.arg("prove").arg(&file).arg("--format").arg("json");
+        if let Some(p) = prover.as_deref() {
+            cmd.arg("-p").arg(p);
+        }
+        let timeout = timeout_secs.unwrap_or(300);
+        cmd.arg("-t").arg(timeout.to_string());
+
+        let output = match cmd.output().await {
+            Ok(o) => o,
+            Err(e) => {
+                let result = ProveResult {
+                    verified: false,
+                    message: format!("Failed to invoke echidna: {e}"),
+                    prover,
+                    raw_output: String::new(),
+                    stderr: String::new(),
+                };
+                return serde_json::to_string_pretty(&result).unwrap_or_default();
+            }
+        };
+
+        let stdout = String::from_utf8_lossy(&output.stdout).into_owned();
+        let stderr = String::from_utf8_lossy(&output.stderr).into_owned();
+
+        let verified = stdout.contains("\"level\": \"success\"")
+            || stdout.contains("\"level\":\"success\"");
+        let message = extract_first_message(&stdout).unwrap_or_else(|| {
+            if stderr.is_empty() {
+                "(no output)".to_string()
+            } else {
+                stderr.lines().take(5).collect::<Vec<_>>().join("\n")
+            }
+        });
+
+        let result = ProveResult {
+            verified,
+            message,
+            prover,
+            raw_output: stdout,
+            stderr,
+        };
+        serde_json::to_string_pretty(&result).unwrap_or_default()
+    }
+}
+
+#[tool_handler(router = self.tool_router)]
+impl ServerHandler for EchidnaMcp {
+    fn get_info(&self) -> ServerInfo {
+        let mut info = ServerInfo::default();
+        info.capabilities = ServerCapabilities::builder().enable_tools().build();
+        info.instructions = Some(
+            "ECHIDNA MCP server. Call `prove` with a proof-file path and \
+             (optionally) a prover name to run a theorem prover from \
+             ECHIDNA's 105-backend portfolio."
+                .into(),
+        );
+        info
+    }
+}
+
+/// Parse the first `"message": "..."` value out of ECHIDNA's stdout.
+fn extract_first_message(stdout: &str) -> Option<String> {
+    let needle = "\"message\"";
+    let start = stdout.find(needle)?;
+    let rest = &stdout[start + needle.len()..];
+    let colon = rest.find(':')?;
+    let after_colon = &rest[colon + 1..];
+    let q1 = after_colon.find('"')?;
+    let after_q1 = &after_colon[q1 + 1..];
+    let bytes = after_q1.as_bytes();
+    let mut end = 0;
+    while end < bytes.len() {
+        if bytes[end] == b'\\' && end + 1 < bytes.len() {
+            end += 2;
+            continue;
+        }
+        if bytes[end] == b'"' {
+            break;
+        }
+        end += 1;
+    }
+    Some(after_q1[..end].to_string())
+}
+
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
+    tracing_subscriber::fmt()
+        .with_env_filter(
+            tracing_subscriber::EnvFilter::try_from_default_env()
+                .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("echidna_mcp=info")),
+        )
+        .with_writer(std::io::stderr)
+        .init();
+
+    let service = EchidnaMcp::new().serve(stdio()).await?;
+    service.waiting().await?;
+    Ok(())
+}
diff --git a/docs/handover/TODO.md b/docs/handover/TODO.md
index 50e4f94..b28cde4 100644
--- a/docs/handover/TODO.md
+++ b/docs/handover/TODO.md
@@ -40,6 +40,16 @@ Handover hints live in `.machine_readable/6a2/STATE.a2ml [wave-3-handover-hints]
 
 Mizar, Nuprl, PVS, Minlog, Dedukti, Arend, KeY, Prism, UPPAAL, ViPER, NuSMV, Spin, TLC, CBMC, Seahorn, dReal, Boogie, Kissat, Alloy. Retain as mock-only unless a maintainer volunteers a Containerfile. Document why each stays mock in a per-backend one-liner.
 
+### Wave-5 (new backend targets, no adapter yet)
+
+Backends not yet scaffolded in `src/rust/provers/`; ProverKind enum, factory dispatch, `kind_to_u8` discriminant, Idris2 injectivity proof, FFI table all need entries before any CI work. Tracked here so they are not forgotten:
+
+| Backend | Scaffold plan | Accessibility |
+|---------|---------------|---------------|
+| Andromeda | New `provers/andromeda.rs` adapter shelling out to `andromeda` CLI; OCaml source build required | Open-source (MIT), build from source |
+| Theorema | Deferred — requires Mathematica licence (commercial) | Access-blocked in OSS CI; revisit if a maintainer has a licence |
+| Globular | Not a CLI prover (web UI for higher category theory) — skipped unless scoped to graphical proof capture | N/A |
+
 ### L3 hygiene
 
 - **Dafny deep-wiring upgrade** — `src/rust/provers/dafny.rs` is 165 LoC; live version-check passes but subprocess wrapper is stub-ish. Upgrade during L3 so live test measures real wiring, not a broken wrapper.

From c4976611a0ba1d23fac6fbb8ee2eb994b2c96641 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 22 Apr 2026 18:00:08 +0000
Subject: [PATCH 2/7] security(deps): bump capnp 0.20 -> 0.25
 (RUSTSEC-2025-0143)

Upgrades the capnp runtime and capnpc code-generator in echidna-wire
to clear RUSTSEC-2025-0143 ("Unsound APIs of public constant::Reader
and StructSchema", fixed in >=0.24). capnpc 0.25 regenerates the
wire module identically and the 3 echidna-wire smoke tests pass.

Only remaining cargo-audit warning is RUSTSEC-2025-0134 (rustls-pemfile
unmaintained) pulled transitively via tonic 0.12 in echidna-grpc; that
is tracked as a separate tonic 0.13 bump campaign.

https://claude.ai/code/session_01NaC5RXjTyr5xg9XXUa2FCC
---
 Cargo.lock                     | 12 ++++++------
 crates/echidna-wire/Cargo.toml |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 308dd45..99c4e63 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -530,18 +530,18 @@ dependencies = [
 
 [[package]]
 name = "capnp"
-version = "0.20.6"
+version = "0.25.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "053b81915c2ce1629b8fb964f578b18cb39b23ef9d5b24120d0dfc959569a1d9"
+checksum = "63da65e5e9ffc3b8f993d4ad222a548152549351a643f6b850a7773cb6ff2809"
 dependencies = [
  "embedded-io",
 ]
 
 [[package]]
 name = "capnpc"
-version = "0.20.1"
+version = "0.25.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1aa3d5f01e69ed11656d2c7c47bf34327ea9bfb5c85c7de787fcd7b6c5e45b61"
+checksum = "fca02be865c8c5a78bfc24b9819006ab6b59bef238467203928e26459557af93"
 dependencies = [
  "capnp",
 ]
@@ -1128,9 +1128,9 @@ checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
 
 [[package]]
 name = "embedded-io"
-version = "0.6.1"
+version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d"
+checksum = "9eb1aa714776b75c7e67e1da744b81a129b3ff919c8712b5e1b32252c1f07cc7"
 
 [[package]]
 name = "encode_unicode"
diff --git a/crates/echidna-wire/Cargo.toml b/crates/echidna-wire/Cargo.toml
index bcfb8e3..8d49a9f 100644
--- a/crates/echidna-wire/Cargo.toml
+++ b/crates/echidna-wire/Cargo.toml
@@ -11,7 +11,7 @@ repository  = "https://github.com/hyperpolymath/echidna"
 build       = "build.rs"
 
 [dependencies]
-capnp = "0.20"
+capnp = "0.25"
 
 [build-dependencies]
-capnpc = "0.20"
+capnpc = "0.25"

From ac209ee615330b1ea7ccadf92741023b477532d4 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 22 Apr 2026 21:26:55 +0000
Subject: [PATCH 3/7] fix(provers): stop verify_proof from round-tripping
 through lossy IR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous pipeline parsed a .smt2/.v/.agda/.dfg/.mm file into the
universal Term IR and then reconstructed the solver's input from that
IR (via each backend's `export(state)` or `term_to_*` helper) before
invoking the solver.  The parsers are incomplete, so non-trivial
inputs were silently mangled — the Coq parser literally emits
`"param": "(P"` with a stray paren — and every downstream solver
then rejected the regenerated file.  A few backends hid this by
early-returning `Ok(true)` whenever `state.goals.is_empty()`, which
fires for real files whose parser simply fails to populate goals the
way the generic pipeline expects — fake success, solver never ran.

Fix follows the pattern already established by Mizar: `parse_file`
stashes the absolute path under `"source_path"` and `parse_string`
stashes the raw content under `"{prover}_source"`; `verify_proof`
prefers those over the lossy reconstruction and falls back to the
old IR-driven path only for synthetic ProofStates that were never
parsed from real source.

Also fixes two binary-invocation bugs surfaced during end-to-end
smoke testing:

- Agda: spawn the process in the file's parent directory so the
  top-level module name resolves relative to CWD (otherwise every
  non-trivial file trips "module does not match file name").
- SPASS: pass input as a positional file argument (the binary
  rejects DFG on stdin, misreading it as additional flags); use
  `-TimeLimit=N` flag syntax (space-separated form makes SPASS
  try to open "N" as a filename); drop `-PGiven=0 -PProblem=0`
  which suppress the "Proof found" banner parse_result matches on.
- Metamath: wire `verify_proof` to the real `metamath` CLI via a
  scripted stdin session (`read "…"; verify proof *; exit`) instead
  of returning `Ok(state.goals.is_empty())` — the previous impl
  never ran the solver.

End-to-end MCP smoke tests now pass for all six apt-installable
MVP provers (Z3, CVC5, Coq, Agda, Metamath, SPASS): `tools/call
prove` → solver → `{"verified": true}`.  The 625-test unit suite
remains green; clippy on the bin+lib remains clean.

The remaining ~30 LOSSY_ROUNDTRIP backends (Lean, Isabelle, F*,
Idris2, Vampire, etc.), the PVS/HOL4 fake-success stubs, and the
TRIVIAL_ONLY SAT solvers (CaDiCaL/Kissat/MiniSat) are tracked for
follow-up.

https://claude.ai/code/session_01NaC5RXjTyr5xg9XXUa2FCC
---
 src/rust/provers/agda.rs     | 59 ++++++++++++++++++++++++++++-
 src/rust/provers/coq.rs      | 46 ++++++++++++++++++++---
 src/rust/provers/cvc5.rs     | 53 +++++++++++++++++---------
 src/rust/provers/metamath.rs | 58 +++++++++++++++++++++++++---
 src/rust/provers/spass.rs    | 73 ++++++++++++++++++++++++------------
 src/rust/provers/z3.rs       | 29 +++++++++++---
 6 files changed, 260 insertions(+), 58 deletions(-)

diff --git a/src/rust/provers/agda.rs b/src/rust/provers/agda.rs
index 2346ccf..b58c4da 100644
--- a/src/rust/provers/agda.rs
+++ b/src/rust/provers/agda.rs
@@ -255,7 +255,12 @@ impl ProverBackend for AgdaBackend {
         let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read Agda file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
@@ -299,11 +304,16 @@ impl ProverBackend for AgdaBackend {
         context.theorems = theorems;
         let goals = self.extract_goals(content).await?;
 
+        let mut metadata = HashMap::new();
+        metadata.insert(
+            "agda_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         Ok(ProofState {
             goals,
             context,
             proof_script: Vec::new(),
-            metadata: HashMap::new(),
+            metadata,
         })
     }
 
@@ -349,6 +359,51 @@ impl ProverBackend for AgdaBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
+        // Prefer the original .agda file when parse_file stashed its path.
+        // The previous `state.goals.is_empty() → Ok(true)` shortcut produced
+        // fake-success — Agda's parser doesn't populate goals the way the
+        // generic pipeline expects, so genuine files hit it without the
+        // solver ever running.
+        //
+        // Agda checks that the top-level module name matches the file path
+        // *relative to its CWD*, so we spawn the process from the file's
+        // parent directory — otherwise every non-trivial file trips
+        // "module … does not match file name".
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let p = std::path::Path::new(path);
+            let mut cmd = Command::new(&self.config.executable);
+            if let Some(parent) = p.parent() {
+                cmd.current_dir(parent);
+            }
+            let output = cmd
+                .arg(p.file_name().map(std::path::Path::new).unwrap_or(p))
+                .output()
+                .await?;
+            return Ok(output.status.success());
+        }
+        if let Some(source) = state.metadata.get("agda_source").and_then(|v| v.as_str()) {
+            let temp_file = std::env::temp_dir()
+                .join(format!("echidna_agda_verify_{}.agda", uuid::Uuid::new_v4()));
+            tokio::fs::write(&temp_file, source).await?;
+            let mut cmd = Command::new(&self.config.executable);
+            if let Some(parent) = temp_file.parent() {
+                cmd.current_dir(parent);
+            }
+            let output = cmd
+                .arg(
+                    temp_file
+                        .file_name()
+                        .map(std::path::Path::new)
+                        .unwrap_or(&temp_file),
+                )
+                .output()
+                .await?;
+            let _ = tokio::fs::remove_file(&temp_file).await;
+            return Ok(output.status.success());
+        }
+
+        // Fallback: reconstruct from IR (lossy). Only hit for synthetic
+        // states never backed by real source.
         if state.goals.is_empty() {
             return Ok(true);
         }
diff --git a/src/rust/provers/coq.rs b/src/rust/provers/coq.rs
index e36c81d..0b2408a 100644
--- a/src/rust/provers/coq.rs
+++ b/src/rust/provers/coq.rs
@@ -834,7 +834,12 @@ impl ProverBackend for CoqBackend {
             .await
             .context("Failed to read file")?;
 
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
@@ -891,11 +896,16 @@ impl ProverBackend for CoqBackend {
             }
         }
 
+        let mut metadata = HashMap::new();
+        metadata.insert(
+            "coq_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         Ok(ProofState {
             goals,
             context,
             proof_script: proof_tactics,
-            metadata: HashMap::new(),
+            metadata,
         })
     }
 
@@ -922,10 +932,36 @@ impl ProverBackend for CoqBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        // Export the proof and try to check it
-        let proof_script = self.export(state).await?;
+        // Prefer the original file when parse_file stashed its path — runs
+        // coqc on exactly what the user wrote, avoiding the lossy export
+        // round-trip (the Coq parser's Term IR is incomplete for anything
+        // beyond trivial syntax).
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = Command::new("coqc")
+                .arg(path)
+                .output()
+                .await
+                .context("Failed to run coqc")?;
+            return Ok(output.status.success());
+        }
+        if let Some(source) = state.metadata.get("coq_source").and_then(|v| v.as_str()) {
+            let temp_file = std::env::temp_dir()
+                .join(format!("echidna_coq_verify_{}.v", uuid::Uuid::new_v4()));
+            tokio::fs::write(&temp_file, source)
+                .await
+                .context("Failed to write temp file")?;
+            let output = Command::new("coqc")
+                .arg(&temp_file)
+                .output()
+                .await
+                .context("Failed to run coqc")?;
+            let _ = tokio::fs::remove_file(&temp_file).await;
+            return Ok(output.status.success());
+        }
 
-        // Write to temporary file and check with coqc
+        // Fallback: reconstruct from IR. Lossy; only hit for synthetic states
+        // that were never parsed from real Coq source.
+        let proof_script = self.export(state).await?;
         let temp_file = std::env::temp_dir().join("echidna_coq_verify.v");
         tokio::fs::write(&temp_file, proof_script)
             .await
diff --git a/src/rust/provers/cvc5.rs b/src/rust/provers/cvc5.rs
index 31aa957..0f53127 100644
--- a/src/rust/provers/cvc5.rs
+++ b/src/rust/provers/cvc5.rs
@@ -547,11 +547,21 @@ impl ProverBackend for CVC5Backend {
         let content = tokio::fs::read_to_string(&path)
             .await
             .context(format!("Failed to read file: {:?}", path))?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
-        self.parse_smtlib_content(content).await
+        let mut state = self.parse_smtlib_content(content).await?;
+        state.metadata.insert(
+            "cvc5_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
+        Ok(state)
     }
 
     async fn apply_tactic(&self, state: &ProofState, tactic: &Tactic) -> Result<TacticResult> {
@@ -603,21 +613,30 @@ impl ProverBackend for CVC5Backend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        if state.goals.is_empty() {
-            return Ok(true);
-        }
-
-        let mut commands = String::new();
-        commands.push_str("(set-logic ALL)\n");
-        for var in &state.context.variables {
-            let ty = self.term_to_smtlib(&var.ty)?;
-            commands.push_str(&format!("(declare-const {} {})\n", var.name, ty));
-        }
-        for goal in &state.goals {
-            let smtlib = self.term_to_smtlib(&goal.target)?;
-            commands.push_str(&format!("(assert (not {}))\n", smtlib));
-        }
-        commands.push_str("(check-sat)\n");
+        // Prefer the original SMT-LIB source when parse_file / parse_string
+        // stashed it.  Skipping the Term IR round-trip is the only way to
+        // keep non-trivial CVC5 inputs intact.
+        let commands = if let Some(source) =
+            state.metadata.get("cvc5_source").and_then(|v| v.as_str())
+        {
+            source.to_string()
+        } else {
+            if state.goals.is_empty() {
+                return Ok(true);
+            }
+            let mut s = String::new();
+            s.push_str("(set-logic ALL)\n");
+            for var in &state.context.variables {
+                let ty = self.term_to_smtlib(&var.ty)?;
+                s.push_str(&format!("(declare-const {} {})\n", var.name, ty));
+            }
+            for goal in &state.goals {
+                let smtlib = self.term_to_smtlib(&goal.target)?;
+                s.push_str(&format!("(assert (not {}))\n", smtlib));
+            }
+            s.push_str("(check-sat)\n");
+            s
+        };
 
         let temp_file =
             std::env::temp_dir().join(format!("echidna_cvc5_verify_{}.smt2", Uuid::new_v4()));
diff --git a/src/rust/provers/metamath.rs b/src/rust/provers/metamath.rs
index 7c0d107..542241f 100644
--- a/src/rust/provers/metamath.rs
+++ b/src/rust/provers/metamath.rs
@@ -287,7 +287,12 @@ impl ProverBackend for MetamathBackend {
         let content = fs::read_to_string(&path)
             .await
             .with_context(|| format!("Failed to read file: {:?}", path))?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
@@ -397,13 +402,54 @@ impl ProverBackend for MetamathBackend {
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
         info!("Verifying Metamath proof with {} goals", state.goals.len());
 
-        if !state.goals.is_empty() {
-            debug!("Proof incomplete: {} goals remaining", state.goals.len());
-            return Ok(false);
+        // Real verification path: shell out to the `metamath` CLI and run
+        // its `verify proof *` command over the original source.  The
+        // previous stub — returning `Ok(state.goals.is_empty())` — never
+        // invoked the solver at all.
+        let source_path = state
+            .metadata
+            .get("source_path")
+            .and_then(|v| v.as_str())
+            .map(PathBuf::from);
+
+        if let Some(path) = source_path {
+            use tokio::io::AsyncWriteExt;
+            use tokio::process::Command;
+
+            let mut child = Command::new(&self.config.executable)
+                .stdin(std::process::Stdio::piped())
+                .stdout(std::process::Stdio::piped())
+                .stderr(std::process::Stdio::piped())
+                .spawn()
+                .context("Failed to spawn metamath")?;
+
+            if let Some(mut stdin) = child.stdin.take() {
+                let script = format!(
+                    "read \"{}\"\nverify proof *\nexit\n",
+                    path.display()
+                );
+                stdin.write_all(script.as_bytes()).await?;
+                stdin.flush().await?;
+                drop(stdin);
+            }
+
+            let output = child.wait_with_output().await?;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            let combined = format!("{}{}", stdout, stderr);
+
+            // metamath prints either "All proofs in the database were
+            // verified" on success, or "?Error" / "proof is incomplete"
+            // on failure.
+            let verified = combined.contains("All proofs in the database were verified")
+                && !combined.contains("?Error")
+                && !combined.to_lowercase().contains("proof is incomplete");
+            return Ok(verified);
         }
 
-        // All goals satisfied
-        Ok(true)
+        // Fallback when no source path was stashed (synthetic ProofState):
+        // all we can say is "no goals left means OK".
+        Ok(state.goals.is_empty())
     }
 
     async fn export(&self, state: &ProofState) -> Result<String> {
diff --git a/src/rust/provers/spass.rs b/src/rust/provers/spass.rs
index 61e318d..33755b9 100644
--- a/src/rust/provers/spass.rs
+++ b/src/rust/provers/spass.rs
@@ -18,7 +18,6 @@ use anyhow::{Context, Result};
 use async_trait::async_trait;
 use std::path::PathBuf;
 use std::process::Stdio;
-use tokio::io::AsyncWriteExt;
 use tokio::process::Command;
 
 use super::{ProverBackend, ProverConfig, ProverKind};
@@ -105,14 +104,23 @@ impl ProverBackend for SPASSBackend {
     }
 
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        let content = tokio::fs::read_to_string(path)
+        let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read proof file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "spass_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
 
         let mut in_axioms = false;
         let mut in_conjectures = false;
@@ -157,28 +165,47 @@ impl ProverBackend for SPASSBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let dfg_code = self.to_dfg(state)?;
-
-        let mut child = Command::new(&self.config.executable)
-            .arg("-TimeLimit")
-            .arg(format!("{}", self.config.timeout))
-            .arg("-PGiven=0")
-            .arg("-PProblem=0")
-            .stdin(Stdio::piped())
-            .stdout(Stdio::piped())
-            .stderr(Stdio::piped())
-            .spawn()
-            .context("Failed to spawn SPASS process")?;
-
-        if let Some(mut stdin) = child.stdin.take() {
-            stdin.write_all(dfg_code.as_bytes()).await?;
-            stdin.flush().await?;
-            drop(stdin);
-        }
-
+        // Prefer the original DFG source when parse_file / parse_string
+        // stashed it.  `to_dfg(state)` reconstructs DFG from the parsed
+        // conjecture list, which silently mangles anything beyond toy
+        // single-atom formulae.  If a real path is available, pass that
+        // directly; otherwise materialise the stashed source (or the
+        // lossy reconstruction) to a temp file.
+        //
+        // SPASS rejects input on stdin — the upstream binary parses the
+        // stdin stream as additional command-line flags — so we always
+        // pass a file path as the final positional argument.
+        let path_to_check: PathBuf =
+            if let Some(p) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+                PathBuf::from(p)
+            } else {
+                let dfg_code = state
+                    .metadata
+                    .get("spass_source")
+                    .and_then(|v| v.as_str())
+                    .map(ToOwned::to_owned)
+                    .map(Ok)
+                    .unwrap_or_else(|| self.to_dfg(state))?;
+                let tmp = std::env::temp_dir()
+                    .join(format!("echidna_spass_{}.dfg", uuid::Uuid::new_v4()));
+                tokio::fs::write(&tmp, dfg_code).await?;
+                tmp
+            };
+
+        // Don't pass -PGiven=0 / -PProblem=0 here: those suppress the
+        // "SPASS beiseite: Proof found." banner that `parse_result`
+        // matches on.
+        // SPASS flag syntax is `-Name=Value` (not `-Name Value`).  Passing
+        // them separated makes SPASS parse the number as a filename and
+        // exit with "Error in opening file N for reading".
         let output = tokio::time::timeout(
             tokio::time::Duration::from_secs(self.config.timeout + 5),
-            child.wait_with_output(),
+            Command::new(&self.config.executable)
+                .arg(format!("-TimeLimit={}", self.config.timeout))
+                .arg(&path_to_check)
+                .stdout(Stdio::piped())
+                .stderr(Stdio::piped())
+                .output(),
         )
         .await
         .context("SPASS timed out")??;
diff --git a/src/rust/provers/z3.rs b/src/rust/provers/z3.rs
index 6f01d69..f740902 100644
--- a/src/rust/provers/z3.rs
+++ b/src/rust/provers/z3.rs
@@ -236,11 +236,21 @@ impl ProverBackend for Z3Backend {
             .await
             .with_context(|| format!("Failed to read file: {:?}", path))?;
 
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
-        self.parse_smt_file(content)
+        let mut state = self.parse_smt_file(content)?;
+        state.metadata.insert(
+            "z3_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
+        Ok(state)
     }
 
     async fn apply_tactic(&self, state: &ProofState, tactic: &Tactic) -> Result<TacticResult> {
@@ -308,6 +318,15 @@ impl ProverBackend for Z3Backend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
+        // Prefer the original SMT-LIB source when it was stashed by
+        // parse_file / parse_string.  Running Z3 on the raw user input
+        // avoids the lossy parse → Term IR → term_to_smt round-trip,
+        // which silently corrupts anything beyond trivial inputs.
+        if let Some(source) = state.metadata.get("z3_source").and_then(|v| v.as_str()) {
+            let result = self.execute_command(source).await?;
+            return Ok(matches!(result, SmtResult::Unsat));
+        }
+
         if state.goals.is_empty() {
             return Ok(true);
         }
@@ -326,17 +345,17 @@ impl ProverBackend for Z3Backend {
             return Ok(false);
         }
 
-        // Build complete SMT-LIB query with variable declarations
+        // Fallback: reconstruct an SMT-LIB query from the parsed IR.
+        // Lossy for anything complex; only reached when no raw source
+        // was captured (e.g. synthetic ProofState built at runtime).
         let mut commands = String::new();
         commands.push_str("(set-logic ALL)\n");
 
-        // Include variable declarations from context
         for var in &state.context.variables {
             let ty_smt = self.term_to_smt(&var.ty);
             commands.push_str(&format!("(declare-const {} {})\n", var.name, ty_smt));
         }
 
-        // Assert negation of each goal (if unsat, goal is valid)
         for goal in &state.goals {
             let smt_goal = self.term_to_smt(&goal.target);
             commands.push_str(&format!("(assert (not {}))\n", smt_goal));

From b1258dae20e24faccc6bfd5a05c639b595213ac5 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 22 Apr 2026 21:55:27 +0000
Subject: [PATCH 4/7] fix(provers): PVS/HOL4 stop reporting fake success
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both backends' verify_proof was `Ok(state.goals.is_empty())` — no
solver invocation at any point, so every file with "no goals left
after parsing" (which is the normal case for top-level declarations)
reported VERIFIED without running PVS or HOL4.

parse_file now stashes the original path as source_path (consistent
with the pattern introduced for the SMT/Coq/Agda/Metamath/SPASS
backends).  verify_proof prefers that path: for PVS it runs the
executable in batch typecheck mode and rejects runs containing
"error" or "typecheck failed"; for HOL4 it runs the configured
driver against the file.

Neither PVS nor HOL4 is installed in this environment, so the
real-solver path is code-only; the fallback preserves the legacy
empty-goals shortcut for synthetic ProofStates that never came
from a parsed file.  The 625-test unit suite remains green.

https://claude.ai/code/session_01NaC5RXjTyr5xg9XXUa2FCC
---
 src/rust/provers/hol4.rs | 33 ++++++++++++++++++++++++++++++++-
 src/rust/provers/pvs.rs  | 34 +++++++++++++++++++++++++++++++++-
 2 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/src/rust/provers/hol4.rs b/src/rust/provers/hol4.rs
index fb5c236..a401f53 100644
--- a/src/rust/provers/hol4.rs
+++ b/src/rust/provers/hol4.rs
@@ -2149,7 +2149,16 @@ impl ProverBackend for Hol4Backend {
             .await
             .context("Failed to read HOL4 file")?;
 
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        state.metadata.insert(
+            "hol4_source".to_string(),
+            serde_json::Value::String(content),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
@@ -2245,6 +2254,28 @@ impl ProverBackend for Hol4Backend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
+        // Previously `Ok(state.goals.is_empty())` — fake success without
+        // ever invoking HOL4.  When parse_file stashed the path, feed the
+        // file to HOL4 via Holmake (the standard batch build driver).
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let p = std::path::Path::new(path);
+            let mut cmd = Command::new(&self.config.executable);
+            if let Some(parent) = p.parent() {
+                cmd.current_dir(parent);
+            }
+            cmd.arg("--holdir")
+                .arg(p.file_name().map(std::path::Path::new).unwrap_or(p));
+            let output = cmd.output().await.context("Failed to invoke HOL4")?;
+            let combined = format!(
+                "{}{}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            );
+            let verified = output.status.success()
+                && !combined.to_lowercase().contains("exception")
+                && !combined.to_lowercase().contains("error");
+            return Ok(verified);
+        }
         Ok(state.goals.is_empty())
     }
 
diff --git a/src/rust/provers/pvs.rs b/src/rust/provers/pvs.rs
index 331bf92..6b25fa1 100644
--- a/src/rust/provers/pvs.rs
+++ b/src/rust/provers/pvs.rs
@@ -2692,7 +2692,16 @@ impl ProverBackend for PVSBackend {
             .await
             .context("Failed to read PVS file")?;
 
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        state.metadata.insert(
+            "pvs_source".to_string(),
+            serde_json::Value::String(content),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
@@ -2824,6 +2833,29 @@ impl ProverBackend for PVSBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
+        // The previous impl was a hollow stub — `Ok(state.goals.is_empty())`
+        // would happily report SUCCESS on a file that never went near PVS.
+        // Shell out to PVS in batch mode against the original source when
+        // it was stashed by parse_file; fall through to the legacy "empty
+        // goals == ok" shortcut only for synthetic ProofStates.
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = Command::new(&self.config.executable)
+                .arg("-batch")
+                .arg("-typecheck")
+                .arg(path)
+                .output()
+                .await
+                .context("Failed to invoke PVS")?;
+            let combined = format!(
+                "{}{}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            );
+            let verified = output.status.success()
+                && !combined.to_lowercase().contains("error")
+                && !combined.to_lowercase().contains("typecheck failed");
+            return Ok(verified);
+        }
         Ok(state.goals.is_empty())
     }
 

From 288be4b4e07788c726c257ecdf5dc628e8472a68 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 22 Apr 2026 22:05:24 +0000
Subject: [PATCH 5/7] =?UTF-8?q?feat(container):=20add=20Containerfile.mcp?=
 =?UTF-8?q?=20=E2=80=94=20agent-callable=20ECHIDNA=20MVP?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Multi-stage build that ships echidna + echidna-mcp alongside the 6
apt-installable MVP provers (Z3, CVC5, Coq, Agda, Metamath, SPASS)
and three upstream-distributed ones (Lean 4 via elan, Isabelle 2025
via tum.de tarball with a pre-built Main heap, F* via the latest
GitHub-release Linux asset).

The runtime entry point is /app/bin/echidna-mcp so an MCP client
connects over stdio:

    podman build -f .containerization/Containerfile.mcp -t echidna:mcp .
    podman run --rm -i echidna:mcp

Consumers wire it into Claude Code / Claude Desktop / any MCP client
via:

    {"mcpServers": {"echidna": {
        "command": "podman",
        "args": ["run","--rm","-i","echidna:mcp"]
    }}}

Mizar is omitted — it has no Debian package and the SourceForge
upstream distributes neither a stable URL nor a signed tarball, so
pulling it reproducibly inside a container build is a separate
exercise.

https://claude.ai/code/session_01NaC5RXjTyr5xg9XXUa2FCC
---
 .containerization/Containerfile.mcp | 149 ++++++++++++++++++++++++++++
 1 file changed, 149 insertions(+)
 create mode 100644 .containerization/Containerfile.mcp

diff --git a/.containerization/Containerfile.mcp b/.containerization/Containerfile.mcp
new file mode 100644
index 0000000..da2b3ed
--- /dev/null
+++ b/.containerization/Containerfile.mcp
@@ -0,0 +1,149 @@
+# SPDX-License-Identifier: PMPL-1.0-or-later
+# SPDX-FileCopyrightText: 2026 Jonathan D.A. Jewell <j.d.a.jewell@open.ac.uk>
+#
+# ECHIDNA MCP Container — exposes ECHIDNA as a Model Context Protocol server
+# over stdio so any MCP-capable AI agent (Claude Code, Claude Desktop, Claude
+# API with tool use, other clients) can call `prove({file, prover, timeout_secs})`
+# and get a real solver verdict.
+#
+# Bundled MVP prover set: Z3, CVC5, Coq, Agda, Metamath, SPASS (all apt), plus
+# Lean 4 (via elan), Isabelle, F*, and Mizar where upstream distributions
+# cooperate with an unprivileged build environment.
+#
+# Build:  podman build -f .containerization/Containerfile.mcp -t echidna:mcp .
+# Run:    podman run --rm -i echidna:mcp           # stdio MCP transport
+# Use:    {"mcpServers":{"echidna":{"command":"podman",
+#           "args":["run","--rm","-i","echidna:mcp"]}}}
+
+# =============================================================================
+# Stage 1: Rust builder — echidna + echidna-mcp + echidna-wire + typed_wasm
+# =============================================================================
+FROM docker.io/library/rust:1.85-slim-bookworm AS rust-builder
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        build-essential \
+        pkg-config \
+        libssl-dev \
+        capnproto \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /build
+
+COPY Cargo.toml Cargo.lock ./
+COPY src/rust ./src/rust
+COPY src/interfaces ./src/interfaces
+COPY crates ./crates
+COPY benches ./benches
+
+# Build both the CLI (echidna) and the MCP server (echidna-mcp). The MCP
+# server shells out to the CLI via the ECHIDNA_BIN env var.
+RUN cargo build --release --bin echidna -p echidna-mcp
+
+# =============================================================================
+# Stage 2: MVP prover installer (Debian apt-available set)
+# =============================================================================
+FROM docker.io/library/debian:bookworm-slim AS mvp-installer
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        z3 \
+        cvc5 \
+        coq \
+        agda-bin \
+        metamath \
+        spass \
+    && rm -rf /var/lib/apt/lists/*
+
+# =============================================================================
+# Stage 3: Lean 4 installer (elan + stable toolchain)
+# =============================================================================
+FROM docker.io/library/debian:bookworm-slim AS lean-installer
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        curl ca-certificates bash \
+    && rm -rf /var/lib/apt/lists/*
+RUN curl -fsSL https://raw.githubusercontent.com/leanprover/elan/master/elan-init.sh \
+        | bash -s -- -y --default-toolchain stable \
+    && . "$HOME/.elan/env" \
+    && lean --version
+
+# =============================================================================
+# Stage 4: Isabelle 2025 installer (tarball + pre-built Main heap)
+# =============================================================================
+FROM docker.io/library/debian:bookworm-slim AS isabelle-installer
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        curl ca-certificates perl fontconfig \
+    && rm -rf /var/lib/apt/lists/*
+RUN curl -fsSL --retry 3 \
+        "https://isabelle.in.tum.de/dist/Isabelle2025-2_linux.tar.gz" \
+        -o /tmp/isabelle.tar.gz \
+    && tar -xzf /tmp/isabelle.tar.gz -C /opt/ \
+    && rm /tmp/isabelle.tar.gz \
+    && rm -rf /opt/Isabelle2025-2/doc \
+    && /opt/Isabelle2025-2/bin/isabelle version \
+    && /opt/Isabelle2025-2/bin/isabelle build -b Main \
+    && rm -rf /root/.isabelle/Isabelle2025-2/log \
+              /root/.isabelle/Isabelle2025-2/browser_info
+
+# =============================================================================
+# Stage 5: F* installer (binary release)
+# =============================================================================
+FROM docker.io/library/debian:bookworm-slim AS fstar-installer
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        curl ca-certificates jq tar gzip \
+    && rm -rf /var/lib/apt/lists/*
+# Resolve the latest F* Linux x86_64 release tarball via the GitHub API
+# (asset naming varies between releases; don't hard-code a version).
+RUN set -eu; \
+    url=$(curl -fsSL https://api.github.com/repos/FStarLang/FStar/releases/latest \
+            | jq -r '.assets[]|select(.name|test("Linux|linux.*x86_64"))|.browser_download_url' \
+            | head -1); \
+    if [ -n "$url" ]; then \
+        curl -fsSL "$url" -o /tmp/fstar.tar.gz \
+        && mkdir -p /opt/fstar \
+        && tar -xzf /tmp/fstar.tar.gz -C /opt/fstar --strip-components=1 \
+        && /opt/fstar/bin/fstar.exe --version || true; \
+    else \
+        mkdir -p /opt/fstar/bin; \
+        echo "WARN: no F* linux asset found; stage empty" >&2; \
+    fi
+
+# =============================================================================
+# Stage 6: Runtime image
+# =============================================================================
+FROM docker.io/library/debian:bookworm-slim
+
+LABEL maintainer="Jonathan D.A. Jewell <j.d.a.jewell@open.ac.uk>"
+LABEL org.opencontainers.image.source="https://github.com/hyperpolymath/echidna"
+LABEL org.opencontainers.image.description="ECHIDNA MCP server — AI-agent-callable theorem proving over stdio"
+LABEL org.opencontainers.image.licenses="PMPL-1.0-or-later"
+LABEL org.opencontainers.image.version="2.1.0"
+
+# Runtime deps: the MVP provers + dynamic libs + perl/fontconfig for Isabelle.
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        ca-certificates \
+        libssl3 \
+        libgmp10 \
+        libstdc++6 \
+        perl \
+        fontconfig \
+        z3 \
+        cvc5 \
+        coq \
+        agda-bin \
+        metamath \
+        spass \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+COPY --from=rust-builder  /build/target/release/echidna     /app/bin/echidna
+COPY --from=rust-builder  /build/target/release/echidna-mcp /app/bin/echidna-mcp
+COPY --from=lean-installer /root/.elan                      /opt/elan
+COPY --from=isabelle-installer /opt/Isabelle2025-2          /opt/Isabelle2025-2
+COPY --from=isabelle-installer /root/.isabelle              /root/.isabelle
+COPY --from=fstar-installer /opt/fstar                      /opt/fstar
+
+ENV PATH="/app/bin:/opt/elan/toolchains/stable/bin:/opt/Isabelle2025-2/bin:/opt/fstar/bin:${PATH}"
+ENV ECHIDNA_BIN="/app/bin/echidna"
+
+# Stdio transport — no ports. The client connects via podman run -i.
+ENTRYPOINT ["/app/bin/echidna-mcp"]

From 80eb943276b3614fc4da72f95db342153f587cc7 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 22 Apr 2026 22:07:29 +0000
Subject: [PATCH 6/7] fix(provers): apply source_path/content pattern to 5 more
 backends
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Batch-apply the same fix shape used for the MVP provers (z3, cvc5, coq,
agda, metamath, spass) to a first wave of Phase-3 backends: abc, acl2,
altergo, eprover, vampire.  parse_file stashes the absolute path as
"source_path"; parse_string stashes the raw content as "{prover}_source";
verify_proof prefers those over reconstruction and falls back to the old
IR-driven path only for synthetic ProofStates.

No binary-invocation changes in this wave — we only gate the existing
solver-spawn logic behind a metadata lookup.  The 625-test unit suite
remains green after the change; cargo build --release completes clean.

Further Phase-3 waves will follow for the remaining LOSSY backends.

https://claude.ai/code/session_01NaC5RXjTyr5xg9XXUa2FCC
---
 src/rust/provers/abc.rs     | 151 ++++++++++++++++++++++++++++++++++--
 src/rust/provers/acl2.rs    |  63 ++++++++++++---
 src/rust/provers/altergo.rs |  53 +++++++++----
 src/rust/provers/eprover.rs |  45 ++++++++++-
 src/rust/provers/vampire.rs |  53 ++++++++++++-
 5 files changed, 330 insertions(+), 35 deletions(-)

diff --git a/src/rust/provers/abc.rs b/src/rust/provers/abc.rs
index da267f9..1a7f8b1 100644
--- a/src/rust/provers/abc.rs
+++ b/src/rust/provers/abc.rs
@@ -414,20 +414,34 @@ impl ProverBackend for AbcBackend {
             .context("Failed to read ABC input file")?;
 
         let extension = path.extension().and_then(|e| e.to_str());
-        self.parse_design(&content, extension)
+        let mut state = self.parse_design(&content, extension)?;
+        state.metadata.insert(
+            "abc_source".to_string(),
+            serde_json::Value::String(content.clone()),
+        );
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         // Without file extension context, try ABC command script first,
         // then fall back to AIGER ASCII if header starts with "aag"/"aig"
         let trimmed = content.trim();
-        if trimmed.starts_with("aag") || trimmed.starts_with("aig") {
-            self.parse_design(content, Some("aig"))
+        let mut state = if trimmed.starts_with("aag") || trimmed.starts_with("aig") {
+            self.parse_design(content, Some("aig"))?
         } else if trimmed.starts_with(".model") || trimmed.starts_with(".inputs") {
-            self.parse_design(content, Some("blif"))
+            self.parse_design(content, Some("blif"))?
         } else {
-            self.parse_design(content, None)
-        }
+            self.parse_design(content, None)?
+        };
+        state.metadata.insert(
+            "abc_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
+        Ok(state)
     }
 
     async fn apply_tactic(&self, state: &ProofState, tactic: &Tactic) -> Result<TacticResult> {
@@ -567,6 +581,131 @@ impl ProverBackend for AbcBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
+        // Prefer the original source — parsing ABC/BLIF/AIGER into the
+        // generic Term IR and reconstructing via `to_abc_script` loses
+        // structure for anything beyond a one-command script.
+        let method = state
+            .metadata
+            .get("abc_method")
+            .and_then(|v| v.as_str())
+            .unwrap_or("pdr");
+        let bmc_bound = state
+            .metadata
+            .get("bmc_bound")
+            .and_then(|v| v.as_str())
+            .unwrap_or("100");
+        let verify_cmd = match method {
+            "bmc" | "bmc3" => format!("bmc3 -F {}", bmc_bound),
+            "int" => "int".to_string(),
+            "dprove" => "dprove".to_string(),
+            "dsec" => "dsec".to_string(),
+            "cec" => "cec".to_string(),
+            _ => "pdr".to_string(),
+        };
+
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let p = std::path::Path::new(path);
+            let read_cmd = match p.extension().and_then(|e| e.to_str()) {
+                Some("aig") => format!("read_aiger {}", path),
+                Some("blif") => format!("read_blif {}", path),
+                Some("v") => format!("read_verilog {}", path),
+                _ => format!("read {}", path),
+            };
+            let script = format!("{}\n{}\nprint_status\nquit\n", read_cmd, verify_cmd);
+            let tmp_dir =
+                tempfile::tempdir().context("Failed to create temporary directory for ABC")?;
+            let tmp_file = tmp_dir.path().join("verify.abc");
+            tokio::fs::write(&tmp_file, &script)
+                .await
+                .context("Failed to write temporary ABC script file")?;
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout),
+                Command::new(&self.config.executable)
+                    .arg("-f")
+                    .arg(&tmp_file)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .map_err(|_| anyhow!("ABC timed out after {} seconds", self.config.timeout))?
+            .context("Failed to execute ABC")?;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            let combined = format!("{}\n{}", stdout, stderr);
+            return self.parse_result(&combined);
+        }
+
+        if let Some(source) = state.metadata.get("abc_source").and_then(|v| v.as_str()) {
+            let trimmed = source.trim();
+            let (ext, read_cmd_base) =
+                if trimmed.starts_with("aag") || trimmed.starts_with("aig") {
+                    (".aig", "read_aiger")
+                } else if trimmed.starts_with(".model") || trimmed.starts_with(".inputs") {
+                    (".blif", "read_blif")
+                } else {
+                    // Inline script — just execute it directly.
+                    ("", "")
+                };
+            let tmp_dir =
+                tempfile::tempdir().context("Failed to create temporary directory for ABC")?;
+            if ext.is_empty() {
+                let tmp_file = tmp_dir.path().join("verify.abc");
+                let script = format!("{}\n{}\nprint_status\nquit\n", source, verify_cmd);
+                tokio::fs::write(&tmp_file, &script)
+                    .await
+                    .context("Failed to write temporary ABC script file")?;
+                let output = tokio::time::timeout(
+                    tokio::time::Duration::from_secs(self.config.timeout),
+                    Command::new(&self.config.executable)
+                        .arg("-f")
+                        .arg(&tmp_file)
+                        .stdout(Stdio::piped())
+                        .stderr(Stdio::piped())
+                        .output(),
+                )
+                .await
+                .map_err(|_| anyhow!("ABC timed out after {} seconds", self.config.timeout))?
+                .context("Failed to execute ABC")?;
+                let stdout = String::from_utf8_lossy(&output.stdout);
+                let stderr = String::from_utf8_lossy(&output.stderr);
+                let combined = format!("{}\n{}", stdout, stderr);
+                return self.parse_result(&combined);
+            } else {
+                let design_file = tmp_dir.path().join(format!("design{}", ext));
+                tokio::fs::write(&design_file, source)
+                    .await
+                    .context("Failed to write temporary ABC design file")?;
+                let script_file = tmp_dir.path().join("verify.abc");
+                let script = format!(
+                    "{} {}\n{}\nprint_status\nquit\n",
+                    read_cmd_base,
+                    design_file.display(),
+                    verify_cmd
+                );
+                tokio::fs::write(&script_file, &script)
+                    .await
+                    .context("Failed to write temporary ABC script file")?;
+                let output = tokio::time::timeout(
+                    tokio::time::Duration::from_secs(self.config.timeout),
+                    Command::new(&self.config.executable)
+                        .arg("-f")
+                        .arg(&script_file)
+                        .stdout(Stdio::piped())
+                        .stderr(Stdio::piped())
+                        .output(),
+                )
+                .await
+                .map_err(|_| anyhow!("ABC timed out after {} seconds", self.config.timeout))?
+                .context("Failed to execute ABC")?;
+                let stdout = String::from_utf8_lossy(&output.stdout);
+                let stderr = String::from_utf8_lossy(&output.stderr);
+                let combined = format!("{}\n{}", stdout, stderr);
+                return self.parse_result(&combined);
+            }
+        }
+
+        // Fallback: reconstruct an ABC script from the parsed IR.
         let abc_script = self.to_abc_script(state)?;
 
         // ABC can execute commands via -c flag or from a script file.
diff --git a/src/rust/provers/acl2.rs b/src/rust/provers/acl2.rs
index d68fa90..3d563dc 100644
--- a/src/rust/provers/acl2.rs
+++ b/src/rust/provers/acl2.rs
@@ -1239,7 +1239,12 @@ impl ProverBackend for ACL2Backend {
             .await
             .context("Failed to read ACL2 file")?;
 
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
@@ -1310,11 +1315,16 @@ impl ProverBackend for ACL2Backend {
             }
         }
 
+        let mut metadata = HashMap::new();
+        metadata.insert(
+            "acl2_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         Ok(ProofState {
             goals,
             context,
             proof_script: vec![],
-            metadata: HashMap::new(),
+            metadata,
         })
     }
 
@@ -1380,6 +1390,47 @@ impl ProverBackend for ACL2Backend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
+        let exec = self
+            .config
+            .executable
+            .to_str()
+            .filter(|s| !s.is_empty())
+            .unwrap_or("acl2");
+
+        // Prefer the original Lisp source — ACL2's `term_to_sexp`
+        // round-trip is lossy for anything beyond trivial arithmetic.
+        let source_file: Option<PathBuf> =
+            if let Some(p) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+                Some(PathBuf::from(p))
+            } else if let Some(src) = state.metadata.get("acl2_source").and_then(|v| v.as_str()) {
+                let tmp = std::env::temp_dir()
+                    .join(format!("echidna_acl2_verify_{}.lisp", uuid::Uuid::new_v4()));
+                tokio::fs::write(&tmp, src)
+                    .await
+                    .context("Failed to write temp file")?;
+                Some(tmp)
+            } else {
+                None
+            };
+
+        if let Some(path) = source_file {
+            let stdin_file = std::fs::File::open(&path)
+                .context("Failed to open source file for ACL2 stdin")?;
+            let output = Command::new(exec)
+                .stdin(std::process::Stdio::from(stdin_file))
+                .output()
+                .await
+                .context("Failed to run ACL2")?;
+            if state.metadata.get("source_path").is_none() {
+                let _ = tokio::fs::remove_file(&path).await;
+            }
+            let output_str = String::from_utf8_lossy(&output.stdout);
+            let err_str = String::from_utf8_lossy(&output.stderr);
+            let combined = format!("{}{}", output_str, err_str);
+            return Ok(combined.contains("Q.E.D.")
+                || (combined.contains("Summary") && !combined.contains("FAILED")));
+        }
+
         if state.goals.is_empty() {
             return Ok(true);
         }
@@ -1394,14 +1445,6 @@ impl ProverBackend for ACL2Backend {
             .await
             .context("Failed to write temp file")?;
 
-        // Run ACL2 in batch mode
-        let exec = self
-            .config
-            .executable
-            .to_str()
-            .filter(|s| !s.is_empty())
-            .unwrap_or("acl2");
-
         let stdin_file =
             std::fs::File::open(&temp_file).context("Failed to open temp file for ACL2 stdin")?;
 
diff --git a/src/rust/provers/altergo.rs b/src/rust/provers/altergo.rs
index 60178d3..160cedb 100644
--- a/src/rust/provers/altergo.rs
+++ b/src/rust/provers/altergo.rs
@@ -112,14 +112,23 @@ impl ProverBackend for AltErgoBackend {
     }
 
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        let content = tokio::fs::read_to_string(path)
+        let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read proof file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "altergo_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
 
         for line in content.lines() {
             let line = line.trim();
@@ -154,19 +163,36 @@ impl ProverBackend for AltErgoBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let ae_code = self.to_altergo(state)?;
-
-        // Write to temp file since Alt-Ergo prefers file input
-        let tmp_dir = std::env::temp_dir();
-        let tmp_file = tmp_dir.join("echidna_altergo_input.ae");
-        tokio::fs::write(&tmp_file, ae_code.as_bytes())
-            .await
-            .context("Failed to write temp file")?;
+        // Prefer the original .ae source — `to_altergo(state)` round-trips
+        // Alt-Ergo syntax through the generic Term IR, which mangles
+        // anything beyond toy inputs.
+        let (path_to_check, is_temp): (PathBuf, bool) =
+            if let Some(p) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+                (PathBuf::from(p), false)
+            } else if let Some(src) = state
+                .metadata
+                .get("altergo_source")
+                .and_then(|v| v.as_str())
+            {
+                let tmp = std::env::temp_dir()
+                    .join(format!("echidna_altergo_{}.ae", uuid::Uuid::new_v4()));
+                tokio::fs::write(&tmp, src)
+                    .await
+                    .context("Failed to write temp file")?;
+                (tmp, true)
+            } else {
+                let ae_code = self.to_altergo(state)?;
+                let tmp = std::env::temp_dir().join("echidna_altergo_input.ae");
+                tokio::fs::write(&tmp, ae_code.as_bytes())
+                    .await
+                    .context("Failed to write temp file")?;
+                (tmp, true)
+            };
 
         let output = tokio::time::timeout(
             tokio::time::Duration::from_secs(self.config.timeout + 5),
             Command::new(&self.config.executable)
-                .arg(&tmp_file)
+                .arg(&path_to_check)
                 .arg("--timelimit")
                 .arg(format!("{}", self.config.timeout))
                 .output(),
@@ -174,8 +200,9 @@ impl ProverBackend for AltErgoBackend {
         .await
         .context("Alt-Ergo timed out")??;
 
-        // Clean up temp file
-        let _ = tokio::fs::remove_file(&tmp_file).await;
+        if is_temp {
+            let _ = tokio::fs::remove_file(&path_to_check).await;
+        }
 
         let stdout = String::from_utf8_lossy(&output.stdout);
         self.parse_result(&stdout)
diff --git a/src/rust/provers/eprover.rs b/src/rust/provers/eprover.rs
index 75ccfb2..c08a074 100644
--- a/src/rust/provers/eprover.rs
+++ b/src/rust/provers/eprover.rs
@@ -96,14 +96,23 @@ impl ProverBackend for EProverBackend {
     }
 
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        let content = tokio::fs::read_to_string(path)
+        let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read proof file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "eprover_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
 
         for line in content.lines() {
             let line = line.trim();
@@ -137,7 +146,37 @@ impl ProverBackend for EProverBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let tptp_code = self.to_tptp(state)?;
+        // Prefer the original TPTP source when parse_file / parse_string
+        // stashed it.  `to_tptp(state)` round-trips through the generic
+        // Term IR and silently mangles anything beyond toy axioms.
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 5),
+                Command::new(&self.config.executable)
+                    .arg("--auto")
+                    .arg("--tptp3-format")
+                    .arg("--cpu-limit")
+                    .arg(format!("{}", self.config.timeout))
+                    .arg("--proof-object=0")
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .context("E Prover timed out")??;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            return self.parse_result(&stdout);
+        }
+        let tptp_code = if let Some(src) = state
+            .metadata
+            .get("eprover_source")
+            .and_then(|v| v.as_str())
+        {
+            src.to_string()
+        } else {
+            self.to_tptp(state)?
+        };
 
         let mut child = Command::new(&self.config.executable)
             .arg("--auto")
diff --git a/src/rust/provers/vampire.rs b/src/rust/provers/vampire.rs
index 852f119..9fd03c5 100644
--- a/src/rust/provers/vampire.rs
+++ b/src/rust/provers/vampire.rs
@@ -102,14 +102,23 @@ impl ProverBackend for VampireBackend {
     }
 
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        let content = tokio::fs::read_to_string(path)
+        let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read proof file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "vampire_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
 
         for line in content.lines() {
             let line = line.trim();
@@ -143,7 +152,45 @@ impl ProverBackend for VampireBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let tptp_code = self.to_tptp(state)?;
+        // Prefer the original TPTP source — `to_tptp(state)` round-trips
+        // through the generic Term IR and silently mangles non-trivial
+        // inputs.
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 5),
+                Command::new(&self.config.executable)
+                    .arg("--mode")
+                    .arg("casc")
+                    .arg("--input_syntax")
+                    .arg("tptp")
+                    .arg("--time_limit")
+                    .arg(format!("{}", self.config.timeout))
+                    .arg("--proof")
+                    .arg("off")
+                    .arg("--statistics")
+                    .arg("brief")
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .context("Vampire timed out")??;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            return self
+                .parse_result(&stdout)
+                .or_else(|_| self.parse_result(&stderr));
+        }
+        let tptp_code = if let Some(src) = state
+            .metadata
+            .get("vampire_source")
+            .and_then(|v| v.as_str())
+        {
+            src.to_string()
+        } else {
+            self.to_tptp(state)?
+        };
 
         let mut child = Command::new(&self.config.executable)
             .arg("--mode")

From 940e940c52746548cdad1b766fbe629fff5a942b Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 22 Apr 2026 22:24:45 +0000
Subject: [PATCH 7/7] fix(provers): extend source_path pattern to 32 remaining
 lossy backends
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Applies the same parse_file → stash "source_path", parse_string →
stash "{prover}_source", verify_proof → prefer-metadata-before-IR
fix to the rest of the LOSSY_ROUNDTRIP backends: alloy, cbmc, chuffed,
dafny, dreal, framac, fstar, glpk, hol_light, idris2, imandra,
isabelle, key, lean, minizinc, minlog, mizar, nuprl, nusmv, ortools,
prism, proverif, scip, seahorn, spin_checker, tamarin, tlaps, tlc,
twelf, uppaal, viper, why3.

Non-trivial adaptations required to preserve semantics:

- idris2: Idris 2 resolves the top-level module name from CWD like
  Agda, so the source_path branch spawns with current_dir set to
  the file's parent and passes the bare filename.
- seahorn: already had a seahorn_input_file escape hatch for pre-
  compiled .bc/.ll bitcode; source_path becomes the primary
  preference with the existing hatch and the IR reconstruction as
  successive fallbacks.
- cbmc/framac: the --unwind / -wp-* metadata knobs were hoisted
  above the source_path branch so every path honours the user's
  bound overrides.
- prism/tlc: these write two files (model + properties/cfg).  The
  source_path branch passes the real model file verbatim; the
  multi-file generation fallback stays intact behind the if-let.
- isabelle/mizar: both already had partial source stashing
  (raw_thy_content, source_path).  Normalised to the canonical
  source_path + {prover}_source convention; Mizar's is_complete()
  short-circuit was moved below the source-backed branches so real
  files are not rejected for having open goals in our IR.
- acl2: ACL2 reads input on stdin — the source_path branch opens
  the real file as Stdio::from(File) rather than piping string
  content, matching the existing lossy-path invocation shape.

No trait signatures changed; every fallback path remains byte-for-
byte identical behind an if-let guard.  Full workspace build, the
625-test unit suite, and `cargo clippy --lib --bin echidna` all
remain green.

Phase 3 closes out the broken-backend audit that started with the
Z3/CVC5/Coq MVP fixes: 39 backends now prefer their original source
over the lossy Term-IR round-trip.

https://claude.ai/code/session_01NaC5RXjTyr5xg9XXUa2FCC
---
 src/rust/provers/alloy.rs        | 46 +++++++++++++++--
 src/rust/provers/cbmc.rs         | 66 +++++++++++++++++++-----
 src/rust/provers/chuffed.rs      | 42 ++++++++++++++--
 src/rust/provers/dafny.rs        | 37 ++++++++++++--
 src/rust/provers/dreal.rs        | 39 ++++++++++++++-
 src/rust/provers/framac.rs       | 86 ++++++++++++++++++++++++++------
 src/rust/provers/fstar.rs        | 36 +++++++++++--
 src/rust/provers/glpk.rs         | 37 ++++++++++++--
 src/rust/provers/hol_light.rs    | 38 +++++++++++++-
 src/rust/provers/idris2.rs       | 47 ++++++++++++++++-
 src/rust/provers/imandra.rs      | 40 +++++++++++++--
 src/rust/provers/isabelle.rs     | 11 +++-
 src/rust/provers/key.rs          | 68 +++++++++++++++++++------
 src/rust/provers/lean.rs         | 41 ++++++++++++++-
 src/rust/provers/minizinc.rs     | 42 ++++++++++++++--
 src/rust/provers/minlog.rs       | 40 +++++++++++++--
 src/rust/provers/mizar.rs        | 32 ++++++++++--
 src/rust/provers/nuprl.rs        | 36 +++++++++++--
 src/rust/provers/nusmv.rs        | 40 +++++++++++++--
 src/rust/provers/ortools.rs      | 40 +++++++++++++--
 src/rust/provers/prism.rs        | 59 ++++++++++++++++++++--
 src/rust/provers/proverif.rs     | 48 ++++++++++++++++--
 src/rust/provers/scip.rs         | 37 ++++++++++++--
 src/rust/provers/seahorn.rs      | 46 +++++++++++++----
 src/rust/provers/spin_checker.rs | 46 +++++++++++++++--
 src/rust/provers/tamarin.rs      | 49 ++++++++++++++++--
 src/rust/provers/tlaps.rs        | 36 +++++++++++--
 src/rust/provers/tlc.rs          | 42 ++++++++++++++--
 src/rust/provers/twelf.rs        | 36 +++++++++++--
 src/rust/provers/uppaal.rs       | 43 ++++++++++++++--
 src/rust/provers/viper.rs        | 40 +++++++++++++--
 src/rust/provers/why3.rs         | 39 +++++++++++++--
 32 files changed, 1247 insertions(+), 138 deletions(-)

diff --git a/src/rust/provers/alloy.rs b/src/rust/provers/alloy.rs
index a0a76ec..fcde4b5 100644
--- a/src/rust/provers/alloy.rs
+++ b/src/rust/provers/alloy.rs
@@ -228,14 +228,24 @@ impl ProverBackend for AlloyBackend {
     }
 
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        let content = tokio::fs::read_to_string(path)
+        let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read Alloy file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
-        self.parse_alloy(content)
+        let mut state = self.parse_alloy(content)?;
+        state.metadata.insert(
+            "alloy_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
+        Ok(state)
     }
 
     async fn apply_tactic(&self, state: &ProofState, tactic: &Tactic) -> Result<TacticResult> {
@@ -293,7 +303,35 @@ impl ProverBackend for AlloyBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let alloy_code = self.to_alloy(state)?;
+        // Prefer the original .als source; `to_alloy(state)` round-trips
+        // through the generic Term IR and cannot reconstruct real models.
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout),
+                Command::new(&self.config.executable)
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .map_err(|_| anyhow!("Alloy timed out after {} seconds", self.config.timeout))?
+            .context("Failed to execute Alloy")?;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            let combined = format!("{}\n{}", stdout, stderr);
+            return self.parse_result(&combined);
+        }
+
+        let alloy_code = if let Some(src) = state
+            .metadata
+            .get("alloy_source")
+            .and_then(|v| v.as_str())
+        {
+            src.to_string()
+        } else {
+            self.to_alloy(state)?
+        };
 
         // Write Alloy to a temporary file
         let tmp_dir =
diff --git a/src/rust/provers/cbmc.rs b/src/rust/provers/cbmc.rs
index 45301b7..a28f3d3 100644
--- a/src/rust/provers/cbmc.rs
+++ b/src/rust/provers/cbmc.rs
@@ -246,14 +246,24 @@ impl ProverBackend for CBMCBackend {
     }
 
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        let content = tokio::fs::read_to_string(path)
+        let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read C source file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
-        self.parse_c_source(content)
+        let mut state = self.parse_c_source(content)?;
+        state.metadata.insert(
+            "cbmc_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
+        Ok(state)
     }
 
     async fn apply_tactic(&self, state: &ProofState, tactic: &Tactic) -> Result<TacticResult> {
@@ -321,7 +331,47 @@ impl ProverBackend for CBMCBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let c_source = self.to_c_source(state)?;
+        // Determine unwind bound from metadata or use default
+        let unwind = state
+            .metadata
+            .get("cbmc_unwind_bound")
+            .and_then(|v| v.as_str())
+            .and_then(|s| s.parse::<u32>().ok())
+            .unwrap_or(self.unwind_bound);
+
+        // Prefer the original .c source — `to_c_source(state)` round-trips
+        // through the generic Term IR and silently mangles anything real.
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 10),
+                Command::new(&self.config.executable)
+                    .arg("--unwind")
+                    .arg(format!("{}", unwind))
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .map_err(|_| {
+                anyhow!(
+                    "CBMC verification timed out after {} seconds",
+                    self.config.timeout
+                )
+            })?
+            .context("Failed to execute CBMC")?;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            let combined = format!("{}\n{}", stdout, stderr);
+            return self.parse_result(&combined);
+        }
+
+        let c_source = if let Some(src) = state.metadata.get("cbmc_source").and_then(|v| v.as_str())
+        {
+            src.to_string()
+        } else {
+            self.to_c_source(state)?
+        };
 
         // Write C source to a temporary file (CBMC requires a file)
         let tmp_dir =
@@ -331,14 +381,6 @@ impl ProverBackend for CBMCBackend {
             .await
             .context("Failed to write temporary C file")?;
 
-        // Determine unwind bound from metadata or use default
-        let unwind = state
-            .metadata
-            .get("cbmc_unwind_bound")
-            .and_then(|v| v.as_str())
-            .and_then(|s| s.parse::<u32>().ok())
-            .unwrap_or(self.unwind_bound);
-
         // Run cbmc with unwind bound
         let output = tokio::time::timeout(
             tokio::time::Duration::from_secs(self.config.timeout + 10),
diff --git a/src/rust/provers/chuffed.rs b/src/rust/provers/chuffed.rs
index ea7a63d..0d56abc 100644
--- a/src/rust/provers/chuffed.rs
+++ b/src/rust/provers/chuffed.rs
@@ -46,11 +46,20 @@ impl ProverBackend for ChuffedBackend {
         Ok("chuffed".to_string())
     }
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        self.parse_string(&tokio::fs::read_to_string(path).await?)
-            .await
+        let content = tokio::fs::read_to_string(&path).await?;
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "chuffed_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         for line in content.lines() {
             let l = line.trim();
             if !l.is_empty() && !l.starts_with("%") {
@@ -74,7 +83,34 @@ impl ProverBackend for ChuffedBackend {
         Err(anyhow::anyhow!("Chuffed: CP solver, not interactive"))
     }
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let input = self.to_input_format(state)?;
+        // Prefer the original FlatZinc source — reconstructing from the
+        // Term IR via `to_input_format` loses all constraint structure.
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 5),
+                Command::new(&self.config.executable)
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .context("Chuffed timed out")??;
+            return self.parse_result(&format!(
+                "{}\n{}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            ));
+        }
+        let input = if let Some(src) = state
+            .metadata
+            .get("chuffed_source")
+            .and_then(|v| v.as_str())
+        {
+            src.to_string()
+        } else {
+            self.to_input_format(state)?
+        };
         let mut child = Command::new(&self.config.executable)
             .stdin(Stdio::piped())
             .stdout(Stdio::piped())
diff --git a/src/rust/provers/dafny.rs b/src/rust/provers/dafny.rs
index a7c26e6..82f99ae 100644
--- a/src/rust/provers/dafny.rs
+++ b/src/rust/provers/dafny.rs
@@ -55,11 +55,20 @@ impl ProverBackend for DafnyBackend {
             .to_string())
     }
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        self.parse_string(&tokio::fs::read_to_string(path).await?)
-            .await
+        let content = tokio::fs::read_to_string(&path).await?;
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "dafny_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         for line in content.lines() {
             let l = line.trim();
             if l.is_empty() || l.starts_with("//") {
@@ -86,7 +95,29 @@ impl ProverBackend for DafnyBackend {
         ))
     }
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let input = self.to_input_format(state)?;
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 5),
+                Command::new(&self.config.executable)
+                    .arg("verify")
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .context("Dafny timed out")??;
+            return self.parse_result(&format!(
+                "{}\n{}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            ));
+        }
+        let input = if let Some(src) = state.metadata.get("dafny_source").and_then(|v| v.as_str()) {
+            src.to_string()
+        } else {
+            self.to_input_format(state)?
+        };
         let mut child = Command::new(&self.config.executable)
             .stdin(Stdio::piped())
             .stdout(Stdio::piped())
diff --git a/src/rust/provers/dreal.rs b/src/rust/provers/dreal.rs
index 285f423..1e080c7 100644
--- a/src/rust/provers/dreal.rs
+++ b/src/rust/provers/dreal.rs
@@ -316,13 +316,23 @@ impl ProverBackend for DRealBackend {
             .await
             .with_context(|| format!("Failed to read dReal SMT-LIB file: {:?}", path))?;
 
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         // Reuse the Z3 SMT-LIB parser since dReal accepts SMT-LIB 2.0 format
         let mut parser = SmtParser::new(content);
-        parser.parse()
+        let mut state = parser.parse()?;
+        state.metadata.insert(
+            "dreal_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
+        Ok(state)
     }
 
     /// dReal is a fully automated solver — interactive tactic application is
@@ -337,6 +347,31 @@ impl ProverBackend for DRealBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
+        // Prefer the original SMT-LIB source — `build_verification_query`
+        // reconstructs from the parsed Term IR, which is lossy for any
+        // non-trivial dReal problem.
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let source = tokio::fs::read_to_string(path).await?;
+            let result = self.execute_command(&source).await?;
+            return match result {
+                DRealResult::Unsat => Ok(true),
+                DRealResult::DeltaSat(_) => Ok(false),
+                DRealResult::Unknown => Ok(false),
+                DRealResult::Error(e) => bail!("dReal verification error: {}", e),
+                DRealResult::Output(_) => Ok(false),
+            };
+        }
+        if let Some(source) = state.metadata.get("dreal_source").and_then(|v| v.as_str()) {
+            let result = self.execute_command(source).await?;
+            return match result {
+                DRealResult::Unsat => Ok(true),
+                DRealResult::DeltaSat(_) => Ok(false),
+                DRealResult::Unknown => Ok(false),
+                DRealResult::Error(e) => bail!("dReal verification error: {}", e),
+                DRealResult::Output(_) => Ok(false),
+            };
+        }
+
         // Trivial cases: no goals or all goals are true
         if state.goals.is_empty() {
             return Ok(true);
diff --git a/src/rust/provers/framac.rs b/src/rust/provers/framac.rs
index 18afc22..5a89bb2 100644
--- a/src/rust/provers/framac.rs
+++ b/src/rust/provers/framac.rs
@@ -541,11 +541,21 @@ impl ProverBackend for FramaCBackend {
         let content = tokio::fs::read_to_string(&path)
             .await
             .with_context(|| format!("Failed to read ACSL-annotated C file: {:?}", path))?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
-        self.parse_acsl_source(content)
+        let mut state = self.parse_acsl_source(content)?;
+        state.metadata.insert(
+            "framac_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
+        Ok(state)
     }
 
     async fn apply_tactic(&self, state: &ProofState, tactic: &Tactic) -> Result<TacticResult> {
@@ -667,20 +677,6 @@ impl ProverBackend for FramaCBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        if state.goals.is_empty() {
-            return Ok(true);
-        }
-
-        let acsl_source = self.to_acsl_source(state)?;
-
-        // Write ACSL-annotated C source to a temporary file
-        let tmp_dir =
-            tempfile::tempdir().context("Failed to create temporary directory for Frama-C")?;
-        let tmp_file = tmp_dir.path().join("program.c");
-        tokio::fs::write(&tmp_file, &acsl_source)
-            .await
-            .context("Failed to write temporary ACSL-annotated C file")?;
-
         // Read WP prover from metadata (may have been changed via tactic)
         let wp_prover = state
             .metadata
@@ -703,6 +699,64 @@ impl ProverBackend for FramaCBackend {
             })
             .unwrap_or(self.wp_timeout);
 
+        // Prefer the original ACSL-annotated .c file; `to_acsl_source` is
+        // lossy for anything beyond trivial function stubs.
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 10),
+                Command::new(&self.config.executable)
+                    .arg("-wp")
+                    .arg("-wp-prover")
+                    .arg(wp_prover)
+                    .arg("-wp-timeout")
+                    .arg(format!("{}", wp_timeout))
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .map_err(|_| {
+                anyhow!(
+                    "Frama-C WP verification timed out after {} seconds",
+                    self.config.timeout
+                )
+            })?
+            .context("Failed to execute Frama-C")?;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            let combined = format!("{}\n{}", stdout, stderr);
+            return Self::parse_wp_result(&combined);
+        }
+
+        if state.goals.is_empty()
+            && state
+                .metadata
+                .get("framac_source")
+                .and_then(|v| v.as_str())
+                .is_none()
+        {
+            return Ok(true);
+        }
+
+        let acsl_source = if let Some(src) = state
+            .metadata
+            .get("framac_source")
+            .and_then(|v| v.as_str())
+        {
+            src.to_string()
+        } else {
+            self.to_acsl_source(state)?
+        };
+
+        // Write ACSL-annotated C source to a temporary file
+        let tmp_dir =
+            tempfile::tempdir().context("Failed to create temporary directory for Frama-C")?;
+        let tmp_file = tmp_dir.path().join("program.c");
+        tokio::fs::write(&tmp_file, &acsl_source)
+            .await
+            .context("Failed to write temporary ACSL-annotated C file")?;
+
         // Run: frama-c -wp -wp-prover <prover> -wp-timeout <timeout> <file>
         let output = tokio::time::timeout(
             tokio::time::Duration::from_secs(self.config.timeout + 10),
diff --git a/src/rust/provers/fstar.rs b/src/rust/provers/fstar.rs
index 1a26c42..7153320 100644
--- a/src/rust/provers/fstar.rs
+++ b/src/rust/provers/fstar.rs
@@ -115,14 +115,23 @@ impl ProverBackend for FStarBackend {
     }
 
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        let content = tokio::fs::read_to_string(path)
+        let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read F* file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "fstar_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         for line in content.lines() {
             let line = line.trim();
             if line.is_empty() || line.starts_with("//") || line.starts_with("(*") {
@@ -155,7 +164,28 @@ impl ProverBackend for FStarBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let input = self.to_input_format(state)?;
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 5),
+                Command::new(&self.config.executable)
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .context("F* timed out")??;
+            return self.parse_result(&format!(
+                "{}\n{}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            ));
+        }
+        let input = if let Some(src) = state.metadata.get("fstar_source").and_then(|v| v.as_str()) {
+            src.to_string()
+        } else {
+            self.to_input_format(state)?
+        };
         let mut child = Command::new(&self.config.executable)
             .stdin(Stdio::piped())
             .stdout(Stdio::piped())
diff --git a/src/rust/provers/glpk.rs b/src/rust/provers/glpk.rs
index ca8a567..d3ee5c2 100644
--- a/src/rust/provers/glpk.rs
+++ b/src/rust/provers/glpk.rs
@@ -51,11 +51,20 @@ impl ProverBackend for GLPKBackend {
             .to_string())
     }
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        self.parse_string(&tokio::fs::read_to_string(path).await?)
-            .await
+        let content = tokio::fs::read_to_string(&path).await?;
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "glpk_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         for line in content.lines() {
             let l = line.trim();
             if !l.is_empty() && !l.starts_with("/*") {
@@ -79,7 +88,29 @@ impl ProverBackend for GLPKBackend {
         Err(anyhow::anyhow!("GLPK: not an interactive prover"))
     }
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let input = self.to_input_format(state)?;
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 5),
+                Command::new(&self.config.executable)
+                    .arg("--lp")
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .context("GLPK timed out")??;
+            return self.parse_result(&format!(
+                "{}\n{}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            ));
+        }
+        let input = if let Some(src) = state.metadata.get("glpk_source").and_then(|v| v.as_str()) {
+            src.to_string()
+        } else {
+            self.to_input_format(state)?
+        };
         let mut child = Command::new(&self.config.executable)
             .arg("--lp")
             .stdin(Stdio::piped())
diff --git a/src/rust/provers/hol_light.rs b/src/rust/provers/hol_light.rs
index 17bb3a8..87d9fff 100644
--- a/src/rust/provers/hol_light.rs
+++ b/src/rust/provers/hol_light.rs
@@ -422,7 +422,12 @@ impl ProverBackend for HolLightBackend {
             .await
             .with_context(|| format!("Failed to read file: {:?}", path))?;
 
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
@@ -483,6 +488,10 @@ impl ProverBackend for HolLightBackend {
                     "definitions".to_string(),
                     serde_json::json!(file.definitions.len()),
                 );
+                meta.insert(
+                    "hol_light_source".to_string(),
+                    serde_json::Value::String(content.to_string()),
+                );
                 meta
             },
         })
@@ -527,6 +536,33 @@ impl ProverBackend for HolLightBackend {
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
         info!("Verifying HOL Light proof");
 
+        // Prefer the original .ml source — `export_to_hol(state)` loses
+        // structure for any non-trivial file.
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let load_cmd = format!("#use \"{}\";;", path);
+            let result = self.execute_command(&load_cmd).await;
+            return match result {
+                Ok(output) => Ok(output.contains("Theorem") || !output.contains("Exception")),
+                Err(_) => Ok(false),
+            };
+        }
+        if let Some(source) = state
+            .metadata
+            .get("hol_light_source")
+            .and_then(|v| v.as_str())
+        {
+            let temp_dir = std::env::temp_dir();
+            let temp_file = temp_dir.join(format!("echidna_{}.ml", uuid::Uuid::new_v4()));
+            fs::write(&temp_file, source).await?;
+            let load_cmd = format!("#use \"{}\";;", temp_file.display());
+            let result = self.execute_command(&load_cmd).await;
+            let _ = fs::remove_file(&temp_file).await;
+            return match result {
+                Ok(output) => Ok(output.contains("Theorem") || !output.contains("Exception")),
+                Err(_) => Ok(false),
+            };
+        }
+
         if state.goals.is_empty() {
             return Ok(true);
         }
diff --git a/src/rust/provers/idris2.rs b/src/rust/provers/idris2.rs
index f45d4f5..00ecac9 100644
--- a/src/rust/provers/idris2.rs
+++ b/src/rust/provers/idris2.rs
@@ -478,7 +478,12 @@ impl ProverBackend for Idris2Backend {
         let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read Idris 2 file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
@@ -595,11 +600,16 @@ impl ProverBackend for Idris2Backend {
         context.theorems = theorems;
         let goals = self.extract_goals(content).await?;
 
+        let mut metadata = HashMap::new();
+        metadata.insert(
+            "idris2_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         Ok(ProofState {
             goals,
             context,
             proof_script: Vec::new(),
-            metadata: HashMap::new(),
+            metadata,
         })
     }
 
@@ -781,6 +791,39 @@ impl ProverBackend for Idris2Backend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
+        // Prefer the original .idr file — `export(state)` reconstructs
+        // from the Term IR and loses most of Idris 2's QTT structure.
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let p = std::path::Path::new(path);
+            let mut cmd = Command::new(&self.config.executable);
+            if let Some(parent) = p.parent() {
+                cmd.current_dir(parent);
+            }
+            let output = cmd
+                .arg("--check")
+                .arg(p.file_name().map(std::path::Path::new).unwrap_or(p))
+                .output()
+                .await?;
+            return Ok(output.status.success());
+        }
+        if let Some(source) = state.metadata.get("idris2_source").and_then(|v| v.as_str()) {
+            let temp_dir = std::env::temp_dir().join(format!(
+                "echidna_idris2_{}",
+                uuid::Uuid::new_v4()
+            ));
+            tokio::fs::create_dir_all(&temp_dir).await?;
+            let temp_file = temp_dir.join("Verify.idr");
+            tokio::fs::write(&temp_file, source).await?;
+            let output = Command::new(&self.config.executable)
+                .arg("--check")
+                .arg(&temp_file)
+                .current_dir(&temp_dir)
+                .output()
+                .await?;
+            let _ = tokio::fs::remove_dir_all(&temp_dir).await;
+            return Ok(output.status.success());
+        }
+
         // Generate Idris 2 code and type-check it
         let temp_dir = std::env::temp_dir().join("echidna_idris2");
         tokio::fs::create_dir_all(&temp_dir).await?;
diff --git a/src/rust/provers/imandra.rs b/src/rust/provers/imandra.rs
index 522fdec..8689b11 100644
--- a/src/rust/provers/imandra.rs
+++ b/src/rust/provers/imandra.rs
@@ -51,11 +51,20 @@ impl ProverBackend for ImandraBackend {
             .to_string())
     }
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        self.parse_string(&tokio::fs::read_to_string(path).await?)
-            .await
+        let content = tokio::fs::read_to_string(&path).await?;
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "imandra_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         for line in content.lines() {
             let l = line.trim();
             if !l.is_empty() && !l.starts_with("(*") {
@@ -79,7 +88,32 @@ impl ProverBackend for ImandraBackend {
         Err(anyhow::anyhow!("Imandra: use verify/instance"))
     }
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let input = self.to_input_format(state)?;
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 5),
+                Command::new(&self.config.executable)
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .context("Imandra timed out")??;
+            return self.parse_result(&format!(
+                "{}\n{}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            ));
+        }
+        let input = if let Some(src) = state
+            .metadata
+            .get("imandra_source")
+            .and_then(|v| v.as_str())
+        {
+            src.to_string()
+        } else {
+            self.to_input_format(state)?
+        };
         let mut child = Command::new(&self.config.executable)
             .stdin(Stdio::piped())
             .stdout(Stdio::piped())
diff --git a/src/rust/provers/isabelle.rs b/src/rust/provers/isabelle.rs
index 3fab082..bc9e9c6 100644
--- a/src/rust/provers/isabelle.rs
+++ b/src/rust/provers/isabelle.rs
@@ -342,7 +342,12 @@ impl ProverBackend for IsabelleBackend {
         let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read theory file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
@@ -393,6 +398,10 @@ impl ProverBackend for IsabelleBackend {
             "raw_thy_content".to_string(),
             serde_json::Value::String(content.to_string()),
         );
+        metadata.insert(
+            "isabelle_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         metadata.insert(
             "theory_name".to_string(),
             serde_json::Value::String(theory_name),
diff --git a/src/rust/provers/key.rs b/src/rust/provers/key.rs
index a8e16b0..d99f364 100644
--- a/src/rust/provers/key.rs
+++ b/src/rust/provers/key.rs
@@ -405,11 +405,11 @@ impl ProverBackend for KeyBackend {
 
         let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
 
-        match ext {
+        let mut state = match ext {
             "key" => {
                 // Parse .key proof problem file
                 let (goals, axioms) = self.parse_key_file(&content);
-                let state = ProofState {
+                ProofState {
                     goals: if goals.is_empty() {
                         vec![Goal {
                             id: "key_goal_0".to_string(),
@@ -424,13 +424,12 @@ impl ProverBackend for KeyBackend {
                         ..Default::default()
                     },
                     ..Default::default()
-                };
-                Ok(state)
+                }
             },
             "java" => {
                 // Parse JML-annotated Java source
                 let goals = self.parse_jml_annotations(&content);
-                let state = ProofState {
+                ProofState {
                     goals: if goals.is_empty() {
                         vec![Goal {
                             id: "java_goal_0".to_string(),
@@ -441,14 +440,24 @@ impl ProverBackend for KeyBackend {
                         goals
                     },
                     ..Default::default()
-                };
-                Ok(state)
+                }
             },
-            _ => Err(anyhow!(
-                "Unsupported file extension '{}' for KeY backend — expected .key or .java",
-                ext
-            )),
-        }
+            _ => {
+                return Err(anyhow!(
+                    "Unsupported file extension '{}' for KeY backend — expected .key or .java",
+                    ext
+                ));
+            },
+        };
+        state.metadata.insert(
+            "key_source".to_string(),
+            serde_json::Value::String(content.clone()),
+        );
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     /// Parse a proof problem from a raw string (assumes `.key` format)
@@ -482,7 +491,7 @@ impl ProverBackend for KeyBackend {
             goals
         };
 
-        let state = ProofState {
+        let mut state = ProofState {
             goals: computed_goals,
             context: crate::core::Context {
                 axioms,
@@ -490,6 +499,10 @@ impl ProverBackend for KeyBackend {
             },
             ..Default::default()
         };
+        state.metadata.insert(
+            "key_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
 
         Ok(state)
     }
@@ -612,7 +625,34 @@ impl ProverBackend for KeyBackend {
     /// Spawns the KeY prover in headless/batch mode, feeds it the proof problem,
     /// and parses the output for "Proof closed" (success) or "open goals" (failure).
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let input = self.to_key_format(state)?;
+        // Prefer the original .key/.java file — `to_key_format(state)` is
+        // lossy for anything beyond trivial goals.
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 5),
+                Command::new(&self.config.executable)
+                    .args(["--auto", "--no-gui"])
+                    .args(["--max-steps", &self.max_steps.to_string()])
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .context("KeY verification timed out")??;
+            let combined = format!(
+                "{}\n{}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            );
+            return self.parse_result(&combined);
+        }
+
+        let input = if let Some(src) = state.metadata.get("key_source").and_then(|v| v.as_str()) {
+            src.to_string()
+        } else {
+            self.to_key_format(state)?
+        };
 
         let mut child = Command::new(&self.config.executable)
             .args(["--auto", "--no-gui"])
diff --git a/src/rust/provers/lean.rs b/src/rust/provers/lean.rs
index 04bcbf1..3e49779 100644
--- a/src/rust/provers/lean.rs
+++ b/src/rust/provers/lean.rs
@@ -1177,7 +1177,15 @@ impl ProverBackend for LeanBackend {
             },
         };
 
-        let state = self.build_proof_state(&declarations, &tactic_state);
+        let mut state = self.build_proof_state(&declarations, &tactic_state);
+        state.metadata.insert(
+            "lean_source".to_string(),
+            serde_json::Value::String(content.clone()),
+        );
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
 
         Ok(state)
     }
@@ -1216,7 +1224,11 @@ impl ProverBackend for LeanBackend {
             },
         };
 
-        let state = self.build_proof_state(&declarations, &tactic_state);
+        let mut state = self.build_proof_state(&declarations, &tactic_state);
+        state.metadata.insert(
+            "lean_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         Ok(state)
     }
 
@@ -1303,6 +1315,31 @@ impl ProverBackend for LeanBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
+        // Prefer the original .lean file — the Term IR round-trip loses
+        // tactic structure that Lean's elaborator depends on.
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let result = self.run_lean(&["--run", path]).await;
+            return match result {
+                Ok(_) => Ok(true),
+                Err(_) => Ok(false),
+            };
+        }
+        if let Some(source) = state.metadata.get("lean_source").and_then(|v| v.as_str()) {
+            let temp_file = std::env::temp_dir()
+                .join(format!("echidna_lean_verify_{}.lean", uuid::Uuid::new_v4()));
+            tokio::fs::write(&temp_file, source)
+                .await
+                .context("Failed to write temporary file")?;
+            let result = self
+                .run_lean(&["--run", &temp_file.to_string_lossy()])
+                .await;
+            let _ = tokio::fs::remove_file(&temp_file).await;
+            return match result {
+                Ok(_) => Ok(true),
+                Err(_) => Ok(false),
+            };
+        }
+
         if state.goals.is_empty() {
             return Ok(true);
         }
diff --git a/src/rust/provers/minizinc.rs b/src/rust/provers/minizinc.rs
index e5f5e5b..4d6f627 100644
--- a/src/rust/provers/minizinc.rs
+++ b/src/rust/provers/minizinc.rs
@@ -57,11 +57,20 @@ impl ProverBackend for MiniZincBackend {
             .to_string())
     }
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        self.parse_string(&tokio::fs::read_to_string(path).await?)
-            .await
+        let content = tokio::fs::read_to_string(&path).await?;
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "minizinc_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         for line in content.lines() {
             let l = line.trim();
             if !l.is_empty() && !l.starts_with("%") {
@@ -87,7 +96,34 @@ impl ProverBackend for MiniZincBackend {
         ))
     }
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let input = self.to_input_format(state)?;
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 5),
+                Command::new(&self.config.executable)
+                    .arg("--solver")
+                    .arg("gecode")
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .context("MiniZinc timed out")??;
+            return self.parse_result(&format!(
+                "{}\n{}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            ));
+        }
+        let input = if let Some(src) = state
+            .metadata
+            .get("minizinc_source")
+            .and_then(|v| v.as_str())
+        {
+            src.to_string()
+        } else {
+            self.to_input_format(state)?
+        };
         let mut child = Command::new(&self.config.executable)
             .arg("--solver")
             .arg("gecode")
diff --git a/src/rust/provers/minlog.rs b/src/rust/provers/minlog.rs
index 7e52b11..374f78a 100644
--- a/src/rust/provers/minlog.rs
+++ b/src/rust/provers/minlog.rs
@@ -43,11 +43,20 @@ impl ProverBackend for MinlogBackend {
         Ok("minlog".to_string())
     }
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        self.parse_string(&tokio::fs::read_to_string(path).await?)
-            .await
+        let content = tokio::fs::read_to_string(&path).await?;
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "minlog_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         for line in content.lines() {
             let l = line.trim();
             if !l.is_empty() && !l.starts_with(";;") {
@@ -71,7 +80,32 @@ impl ProverBackend for MinlogBackend {
         Err(anyhow::anyhow!("Minlog: use proof terms"))
     }
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let input = self.to_input_format(state)?;
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 5),
+                Command::new(&self.config.executable)
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .context("Minlog timed out")??;
+            return self.parse_result(&format!(
+                "{}\n{}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            ));
+        }
+        let input = if let Some(src) = state
+            .metadata
+            .get("minlog_source")
+            .and_then(|v| v.as_str())
+        {
+            src.to_string()
+        } else {
+            self.to_input_format(state)?
+        };
         let mut child = Command::new(&self.config.executable)
             .stdin(Stdio::piped())
             .stdout(Stdio::piped())
diff --git a/src/rust/provers/mizar.rs b/src/rust/provers/mizar.rs
index eaaa140..f0de9ce 100644
--- a/src/rust/provers/mizar.rs
+++ b/src/rust/provers/mizar.rs
@@ -422,11 +422,16 @@ impl ProverBackend for MizarBackend {
             });
         }
 
+        let mut metadata = HashMap::new();
+        metadata.insert(
+            "mizar_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         Ok(ProofState {
             goals,
             context,
             proof_script: vec![],
-            metadata: HashMap::new(),
+            metadata,
         })
     }
 
@@ -544,10 +549,11 @@ impl ProverBackend for MizarBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        if !state.is_complete() {
-            return Ok(false);
-        }
-
+        // Prefer the original .miz file — `export_to_mizar(state)` is
+        // lossy for anything beyond trivial theorems.  The previous
+        // `is_complete()` short-circuit produced spurious false results
+        // when parse_string left open goals (Mizar's own checker is the
+        // authority, not our IR).
         if let Some(serde_json::Value::String(path)) = state.metadata.get("source_path") {
             let path = Path::new(path);
             if path.exists() {
@@ -556,6 +562,22 @@ impl ProverBackend for MizarBackend {
             }
         }
 
+        if let Some(source) = state.metadata.get("mizar_source").and_then(|v| v.as_str()) {
+            let temp_dir = std::env::temp_dir();
+            let temp_file = temp_dir.join(format!("echidna_{}.miz", uuid::Uuid::new_v4()));
+            let mut file = fs::File::create(&temp_file).await?;
+            file.write_all(source.as_bytes()).await?;
+            file.sync_all().await?;
+            drop(file);
+            let result = self.verify_file(&temp_file).await?;
+            let _ = fs::remove_file(&temp_file).await;
+            return Ok(result.success);
+        }
+
+        if !state.is_complete() {
+            return Ok(false);
+        }
+
         let mizar_content = self.export_to_mizar(state)?;
 
         let temp_dir = std::env::temp_dir();
diff --git a/src/rust/provers/nuprl.rs b/src/rust/provers/nuprl.rs
index 39171e3..06b9c67 100644
--- a/src/rust/provers/nuprl.rs
+++ b/src/rust/provers/nuprl.rs
@@ -43,11 +43,20 @@ impl ProverBackend for NuprlBackend {
         Ok("nuprl".to_string())
     }
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        self.parse_string(&tokio::fs::read_to_string(path).await?)
-            .await
+        let content = tokio::fs::read_to_string(&path).await?;
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "nuprl_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         for line in content.lines() {
             let l = line.trim();
             if !l.is_empty() && !l.starts_with("(*") {
@@ -71,7 +80,28 @@ impl ProverBackend for NuprlBackend {
         Err(anyhow::anyhow!("Nuprl: use refinement tactics"))
     }
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let input = self.to_input_format(state)?;
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 5),
+                Command::new(&self.config.executable)
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .context("Nuprl timed out")??;
+            return self.parse_result(&format!(
+                "{}\n{}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            ));
+        }
+        let input = if let Some(src) = state.metadata.get("nuprl_source").and_then(|v| v.as_str()) {
+            src.to_string()
+        } else {
+            self.to_input_format(state)?
+        };
         let mut child = Command::new(&self.config.executable)
             .stdin(Stdio::piped())
             .stdout(Stdio::piped())
diff --git a/src/rust/provers/nusmv.rs b/src/rust/provers/nusmv.rs
index c464b40..b279a1c 100644
--- a/src/rust/provers/nusmv.rs
+++ b/src/rust/provers/nusmv.rs
@@ -234,14 +234,24 @@ impl ProverBackend for NuSMVBackend {
     }
 
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        let content = tokio::fs::read_to_string(path)
+        let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read SMV file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
-        self.parse_smv(content)
+        let mut state = self.parse_smv(content)?;
+        state.metadata.insert(
+            "nusmv_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
+        Ok(state)
     }
 
     async fn apply_tactic(&self, state: &ProofState, tactic: &Tactic) -> Result<TacticResult> {
@@ -303,7 +313,29 @@ impl ProverBackend for NuSMVBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let smv_code = self.to_smv(state)?;
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout),
+                Command::new(&self.config.executable)
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .map_err(|_| anyhow!("NuSMV timed out after {} seconds", self.config.timeout))?
+            .context("Failed to execute NuSMV")?;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            let combined = format!("{}\n{}", stdout, stderr);
+            return self.parse_result(&combined);
+        }
+        let smv_code = if let Some(src) = state.metadata.get("nusmv_source").and_then(|v| v.as_str())
+        {
+            src.to_string()
+        } else {
+            self.to_smv(state)?
+        };
 
         // Write SMV to a temporary file (nuXmv requires a file)
         let tmp_dir =
diff --git a/src/rust/provers/ortools.rs b/src/rust/provers/ortools.rs
index 929f960..3a9295b 100644
--- a/src/rust/provers/ortools.rs
+++ b/src/rust/provers/ortools.rs
@@ -43,11 +43,20 @@ impl ProverBackend for ORToolsBackend {
         Ok("or-tools".to_string())
     }
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        self.parse_string(&tokio::fs::read_to_string(path).await?)
-            .await
+        let content = tokio::fs::read_to_string(&path).await?;
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "ortools_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         for line in content.lines() {
             let l = line.trim();
             if !l.is_empty() && !l.starts_with("//") {
@@ -73,7 +82,32 @@ impl ProverBackend for ORToolsBackend {
         ))
     }
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let input = self.to_input_format(state)?;
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 5),
+                Command::new(&self.config.executable)
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .context("OR-Tools timed out")??;
+            return self.parse_result(&format!(
+                "{}\n{}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            ));
+        }
+        let input = if let Some(src) = state
+            .metadata
+            .get("ortools_source")
+            .and_then(|v| v.as_str())
+        {
+            src.to_string()
+        } else {
+            self.to_input_format(state)?
+        };
         let mut child = Command::new(&self.config.executable)
             .stdin(Stdio::piped())
             .stdout(Stdio::piped())
diff --git a/src/rust/provers/prism.rs b/src/rust/provers/prism.rs
index 41c34cf..c26de4b 100644
--- a/src/rust/provers/prism.rs
+++ b/src/rust/provers/prism.rs
@@ -237,14 +237,24 @@ impl ProverBackend for PrismBackend {
     }
 
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        let content = tokio::fs::read_to_string(path)
+        let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read PRISM file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
-        self.parse_prism(content)
+        let mut state = self.parse_prism(content)?;
+        state.metadata.insert(
+            "prism_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
+        Ok(state)
     }
 
     async fn apply_tactic(&self, state: &ProofState, tactic: &Tactic) -> Result<TacticResult> {
@@ -306,6 +316,49 @@ impl ProverBackend for PrismBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
+        // Prefer the original .pm/.prism file — reconstructing both
+        // model and properties from the Term IR is lossy.
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout),
+                Command::new(&self.config.executable)
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .map_err(|_| anyhow!("PRISM timed out after {} seconds", self.config.timeout))?
+            .context("Failed to execute PRISM")?;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            let combined = format!("{}\n{}", stdout, stderr);
+            return self.parse_result(&combined);
+        }
+        if let Some(src) = state.metadata.get("prism_source").and_then(|v| v.as_str()) {
+            let tmp_dir =
+                tempfile::tempdir().context("Failed to create temporary directory for PRISM")?;
+            let model_file = tmp_dir.path().join("model.pm");
+            tokio::fs::write(&model_file, src)
+                .await
+                .context("Failed to write temporary PRISM model file")?;
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout),
+                Command::new(&self.config.executable)
+                    .arg(&model_file)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .map_err(|_| anyhow!("PRISM timed out after {} seconds", self.config.timeout))?
+            .context("Failed to execute PRISM")?;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            let combined = format!("{}\n{}", stdout, stderr);
+            return self.parse_result(&combined);
+        }
+
         let model_code = self.to_prism_model(state)?;
         let props_code = self.to_prism_properties(state);
 
diff --git a/src/rust/provers/proverif.rs b/src/rust/provers/proverif.rs
index ef82793..721a6c5 100644
--- a/src/rust/provers/proverif.rs
+++ b/src/rust/provers/proverif.rs
@@ -434,14 +434,24 @@ impl ProverBackend for ProVerifBackend {
     }
 
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        let content = tokio::fs::read_to_string(path)
+        let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read .pv file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
-        self.parse_pv(content)
+        let mut state = self.parse_pv(content)?;
+        state.metadata.insert(
+            "proverif_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
+        Ok(state)
     }
 
     async fn apply_tactic(&self, _state: &ProofState, tactic: &Tactic) -> Result<TacticResult> {
@@ -455,7 +465,37 @@ impl ProverBackend for ProVerifBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let pv_code = self.to_pv(state)?;
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 10),
+                Command::new(&self.config.executable)
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .map_err(|_| {
+                anyhow!(
+                    "ProVerif verification timed out after {} seconds",
+                    self.config.timeout
+                )
+            })?
+            .context("Failed to execute proverif")?;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            let combined = format!("{}\n{}", stdout, stderr);
+            return self.parse_result(&combined);
+        }
+        let pv_code = if let Some(src) = state
+            .metadata
+            .get("proverif_source")
+            .and_then(|v| v.as_str())
+        {
+            src.to_string()
+        } else {
+            self.to_pv(state)?
+        };
 
         // Write .pv to a temporary file (proverif requires a file path)
         let tmp_dir =
diff --git a/src/rust/provers/scip.rs b/src/rust/provers/scip.rs
index 02c2815..eeec392 100644
--- a/src/rust/provers/scip.rs
+++ b/src/rust/provers/scip.rs
@@ -51,11 +51,20 @@ impl ProverBackend for SCIPBackend {
             .to_string())
     }
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        self.parse_string(&tokio::fs::read_to_string(path).await?)
-            .await
+        let content = tokio::fs::read_to_string(&path).await?;
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "scip_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         for line in content.lines() {
             let l = line.trim();
             if !l.is_empty() && !l.starts_with("\\") {
@@ -79,7 +88,29 @@ impl ProverBackend for SCIPBackend {
         Err(anyhow::anyhow!("SCIP: not an interactive prover"))
     }
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let input = self.to_input_format(state)?;
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 5),
+                Command::new(&self.config.executable)
+                    .arg("-f")
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .context("SCIP timed out")??;
+            return self.parse_result(&format!(
+                "{}\n{}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            ));
+        }
+        let input = if let Some(src) = state.metadata.get("scip_source").and_then(|v| v.as_str()) {
+            src.to_string()
+        } else {
+            self.to_input_format(state)?
+        };
         let mut child = Command::new(&self.config.executable)
             .stdin(Stdio::piped())
             .stdout(Stdio::piped())
diff --git a/src/rust/provers/seahorn.rs b/src/rust/provers/seahorn.rs
index f5b5f68..83adeed 100644
--- a/src/rust/provers/seahorn.rs
+++ b/src/rust/provers/seahorn.rs
@@ -369,13 +369,13 @@ impl ProverBackend for SeaHornBackend {
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
         let extension = path.extension().and_then(|e| e.to_str()).unwrap_or("");
 
-        match extension {
+        let mut state = match extension {
             "c" | "h" => {
                 // Parse C source to extract assertions and assumptions
                 let content = tokio::fs::read_to_string(&path)
                     .await
                     .context("Failed to read C source file for SeaHorn")?;
-                self.parse_string(&content).await
+                self.parse_string(&content).await?
             },
             "bc" | "ll" => {
                 // LLVM bitcode / IR — we cannot parse assertions directly,
@@ -389,17 +389,29 @@ impl ProverBackend for SeaHornBackend {
                     "seahorn_input_type".to_string(),
                     serde_json::Value::String(extension.to_string()),
                 );
-                Ok(state)
+                state
             },
-            _ => Err(anyhow!(
-                "SeaHorn: unsupported file extension '.{}' (expected .c, .bc, or .ll)",
-                extension
-            )),
-        }
+            _ => {
+                return Err(anyhow!(
+                    "SeaHorn: unsupported file extension '.{}' (expected .c, .bc, or .ll)",
+                    extension
+                ));
+            },
+        };
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
-        self.parse_c_source(content)
+        let mut state = self.parse_c_source(content)?;
+        state.metadata.insert(
+            "seahorn_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
+        Ok(state)
     }
 
     async fn apply_tactic(&self, state: &ProofState, tactic: &Tactic) -> Result<TacticResult> {
@@ -520,11 +532,25 @@ impl ProverBackend for SeaHornBackend {
         let tmp_dir =
             tempfile::tempdir().context("Failed to create temporary directory for SeaHorn")?;
 
-        let input_path = if let Some(serde_json::Value::String(file_path)) =
+        let input_path = if let Some(path) =
+            state.metadata.get("source_path").and_then(|v| v.as_str())
+        {
+            PathBuf::from(path)
+        } else if let Some(serde_json::Value::String(file_path)) =
             state.metadata.get("seahorn_input_file")
         {
             // Use the referenced input file directly (bitcode or C source)
             PathBuf::from(file_path)
+        } else if let Some(src) = state
+            .metadata
+            .get("seahorn_source")
+            .and_then(|v| v.as_str())
+        {
+            let tmp_file = tmp_dir.path().join("program.c");
+            tokio::fs::write(&tmp_file, src)
+                .await
+                .context("Failed to write temporary C file for SeaHorn")?;
+            tmp_file
         } else {
             // Generate C source from proof state
             let c_source = self.to_c_source(state)?;
diff --git a/src/rust/provers/spin_checker.rs b/src/rust/provers/spin_checker.rs
index d44944a..711c17c 100644
--- a/src/rust/provers/spin_checker.rs
+++ b/src/rust/provers/spin_checker.rs
@@ -218,14 +218,24 @@ impl ProverBackend for SpinBackend {
     }
 
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        let content = tokio::fs::read_to_string(path)
+        let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read Promela file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
-        self.parse_promela(content)
+        let mut state = self.parse_promela(content)?;
+        state.metadata.insert(
+            "spin_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
+        Ok(state)
     }
 
     async fn apply_tactic(&self, state: &ProofState, tactic: &Tactic) -> Result<TacticResult> {
@@ -288,7 +298,35 @@ impl ProverBackend for SpinBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let promela_code = self.to_promela(state)?;
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 10),
+                Command::new(&self.config.executable)
+                    .arg("-run")
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .map_err(|_| {
+                anyhow!(
+                    "SPIN verification timed out after {} seconds",
+                    self.config.timeout
+                )
+            })?
+            .context("Failed to execute SPIN")?;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            let combined = format!("{}\n{}", stdout, stderr);
+            return self.parse_result(&combined);
+        }
+        let promela_code = if let Some(src) = state.metadata.get("spin_source").and_then(|v| v.as_str())
+        {
+            src.to_string()
+        } else {
+            self.to_promela(state)?
+        };
 
         // Write Promela to a temporary file (spin -run requires a file)
         let tmp_dir =
diff --git a/src/rust/provers/tamarin.rs b/src/rust/provers/tamarin.rs
index c4a1dfb..c507dc8 100644
--- a/src/rust/provers/tamarin.rs
+++ b/src/rust/provers/tamarin.rs
@@ -286,14 +286,24 @@ impl ProverBackend for TamarinBackend {
     }
 
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        let content = tokio::fs::read_to_string(path)
+        let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read .spthy file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
-        self.parse_spthy(content)
+        let mut state = self.parse_spthy(content)?;
+        state.metadata.insert(
+            "tamarin_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
+        Ok(state)
     }
 
     async fn apply_tactic(&self, state: &ProofState, tactic: &Tactic) -> Result<TacticResult> {
@@ -370,7 +380,38 @@ impl ProverBackend for TamarinBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let spthy_code = self.to_spthy(state)?;
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 10),
+                Command::new(&self.config.executable)
+                    .arg("--prove")
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .map_err(|_| {
+                anyhow!(
+                    "Tamarin verification timed out after {} seconds",
+                    self.config.timeout
+                )
+            })?
+            .context("Failed to execute tamarin-prover")?;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            let combined = format!("{}\n{}", stdout, stderr);
+            return self.parse_result(&combined);
+        }
+        let spthy_code = if let Some(src) = state
+            .metadata
+            .get("tamarin_source")
+            .and_then(|v| v.as_str())
+        {
+            src.to_string()
+        } else {
+            self.to_spthy(state)?
+        };
 
         // Write .spthy to a temporary file (tamarin-prover requires a file)
         let tmp_dir =
diff --git a/src/rust/provers/tlaps.rs b/src/rust/provers/tlaps.rs
index 5d2fdd2..f4ef4a1 100644
--- a/src/rust/provers/tlaps.rs
+++ b/src/rust/provers/tlaps.rs
@@ -53,11 +53,20 @@ impl ProverBackend for TLAPSBackend {
             .to_string())
     }
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        self.parse_string(&tokio::fs::read_to_string(path).await?)
-            .await
+        let content = tokio::fs::read_to_string(&path).await?;
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "tlaps_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         for line in content.lines() {
             let l = line.trim();
             if l.is_empty()
@@ -88,7 +97,28 @@ impl ProverBackend for TLAPSBackend {
         Err(anyhow::anyhow!("TLAPS: use OBVIOUS/BY/QED"))
     }
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let input = self.to_input_format(state)?;
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 5),
+                Command::new(&self.config.executable)
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .context("TLAPS timed out")??;
+            return self.parse_result(&format!(
+                "{}\n{}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            ));
+        }
+        let input = if let Some(src) = state.metadata.get("tlaps_source").and_then(|v| v.as_str()) {
+            src.to_string()
+        } else {
+            self.to_input_format(state)?
+        };
         let mut child = Command::new(&self.config.executable)
             .stdin(Stdio::piped())
             .stdout(Stdio::piped())
diff --git a/src/rust/provers/tlc.rs b/src/rust/provers/tlc.rs
index 580bdaa..2b7463b 100644
--- a/src/rust/provers/tlc.rs
+++ b/src/rust/provers/tlc.rs
@@ -224,14 +224,24 @@ impl ProverBackend for TLCBackend {
     }
 
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        let content = tokio::fs::read_to_string(path)
+        let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read TLA+ file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
-        self.parse_tla(content)
+        let mut state = self.parse_tla(content)?;
+        state.metadata.insert(
+            "tlc_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
+        Ok(state)
     }
 
     async fn apply_tactic(&self, state: &ProofState, tactic: &Tactic) -> Result<TacticResult> {
@@ -294,7 +304,31 @@ impl ProverBackend for TLCBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let tla_code = self.to_tla(state)?;
+        // Prefer the original .tla file — `to_tla(state)` is lossy.
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout),
+                Command::new(&self.config.executable)
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .map_err(|_| anyhow!("TLC timed out after {} seconds", self.config.timeout))?
+            .context("Failed to execute TLC")?;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            let combined = format!("{}\n{}", stdout, stderr);
+            return self.parse_result(&combined);
+        }
+
+        let tla_code = if let Some(src) = state.metadata.get("tlc_source").and_then(|v| v.as_str())
+        {
+            src.to_string()
+        } else {
+            self.to_tla(state)?
+        };
         let cfg_code = self.to_cfg(state);
 
         // Write TLA+ and CFG to temporary files
diff --git a/src/rust/provers/twelf.rs b/src/rust/provers/twelf.rs
index 03b3dbe..2c5cff7 100644
--- a/src/rust/provers/twelf.rs
+++ b/src/rust/provers/twelf.rs
@@ -43,11 +43,20 @@ impl ProverBackend for TwelfBackend {
         Ok("twelf".to_string())
     }
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        self.parse_string(&tokio::fs::read_to_string(path).await?)
-            .await
+        let content = tokio::fs::read_to_string(&path).await?;
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "twelf_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         for line in content.lines() {
             let l = line.trim();
             if !l.is_empty() && !l.starts_with("%") {
@@ -71,7 +80,28 @@ impl ProverBackend for TwelfBackend {
         Err(anyhow::anyhow!("Twelf: use LF declarations"))
     }
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let input = self.to_input_format(state)?;
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 5),
+                Command::new(&self.config.executable)
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .context("Twelf timed out")??;
+            return self.parse_result(&format!(
+                "{}\n{}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            ));
+        }
+        let input = if let Some(src) = state.metadata.get("twelf_source").and_then(|v| v.as_str()) {
+            src.to_string()
+        } else {
+            self.to_input_format(state)?
+        };
         let mut child = Command::new(&self.config.executable)
             .stdin(Stdio::piped())
             .stdout(Stdio::piped())
diff --git a/src/rust/provers/uppaal.rs b/src/rust/provers/uppaal.rs
index 0716b43..1ecc4a3 100644
--- a/src/rust/provers/uppaal.rs
+++ b/src/rust/provers/uppaal.rs
@@ -249,14 +249,24 @@ impl ProverBackend for UppaalBackend {
     }
 
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        let content = tokio::fs::read_to_string(path)
+        let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read UPPAAL XML file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
-        self.parse_uppaal_xml(content)
+        let mut state = self.parse_uppaal_xml(content)?;
+        state.metadata.insert(
+            "uppaal_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
+        Ok(state)
     }
 
     async fn apply_tactic(&self, state: &ProofState, tactic: &Tactic) -> Result<TacticResult> {
@@ -322,7 +332,32 @@ impl ProverBackend for UppaalBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let xml_code = self.to_uppaal_xml(state)?;
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout),
+                Command::new(&self.config.executable)
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .map_err(|_| anyhow!("UPPAAL timed out after {} seconds", self.config.timeout))?
+            .context("Failed to execute verifyta")?;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            let combined = format!("{}\n{}", stdout, stderr);
+            return self.parse_result(&combined);
+        }
+        let xml_code = if let Some(src) = state
+            .metadata
+            .get("uppaal_source")
+            .and_then(|v| v.as_str())
+        {
+            src.to_string()
+        } else {
+            self.to_uppaal_xml(state)?
+        };
         let query_code = self.to_query_file(state);
 
         // Write model and queries to temporary files
diff --git a/src/rust/provers/viper.rs b/src/rust/provers/viper.rs
index 80dff15..515d9c9 100644
--- a/src/rust/provers/viper.rs
+++ b/src/rust/provers/viper.rs
@@ -134,10 +134,15 @@ impl ProverBackend for ViperBackend {
     }
 
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        let content = tokio::fs::read_to_string(path)
+        let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read proof file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     /// Parse a Silver (.vpr) program into a ProofState.
@@ -146,6 +151,10 @@ impl ProverBackend for ViperBackend {
     /// into goals. Comments (// and /* */) are skipped.
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "viper_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
 
         for line in content.lines() {
             let line = line.trim();
@@ -220,7 +229,32 @@ impl ProverBackend for ViperBackend {
     /// the configured timeout. Both Silicon and Carbon are supported via
     /// the `--backend` flag (configurable through extra args).
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let silver_code = self.to_silver(state)?;
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 5),
+                Command::new(&self.config.executable)
+                    .arg("--timeout")
+                    .arg(format!("{}", self.config.timeout))
+                    .args(&self.config.args)
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .context("Viper timed out")??;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            return self
+                .parse_result(&stdout)
+                .or_else(|_| self.parse_result(&stderr));
+        }
+        let silver_code = if let Some(src) = state.metadata.get("viper_source").and_then(|v| v.as_str())
+        {
+            src.to_string()
+        } else {
+            self.to_silver(state)?
+        };
 
         let mut child = Command::new(&self.config.executable)
             .arg("--timeout")
diff --git a/src/rust/provers/why3.rs b/src/rust/provers/why3.rs
index c136b16..3c75838 100644
--- a/src/rust/provers/why3.rs
+++ b/src/rust/provers/why3.rs
@@ -56,11 +56,20 @@ impl ProverBackend for Why3Backend {
             .to_string())
     }
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        self.parse_string(&tokio::fs::read_to_string(path).await?)
-            .await
+        let content = tokio::fs::read_to_string(&path).await?;
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "why3_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         for line in content.lines() {
             let l = line.trim();
             if l.is_empty() || l.starts_with("(*") {
@@ -89,7 +98,31 @@ impl ProverBackend for Why3Backend {
         Err(anyhow::anyhow!("Why3: use built-in transformations"))
     }
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let input = self.to_input_format(state)?;
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 5),
+                Command::new(&self.config.executable)
+                    .arg("prove")
+                    .arg("-P")
+                    .arg("z3")
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .context("Why3 timed out")??;
+            return self.parse_result(&format!(
+                "{}\n{}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            ));
+        }
+        let input = if let Some(src) = state.metadata.get("why3_source").and_then(|v| v.as_str()) {
+            src.to_string()
+        } else {
+            self.to_input_format(state)?
+        };
         let mut child = Command::new(&self.config.executable)
             .arg("prove")
             .arg("-P")