diff --git a/.containerization/Containerfile.mcp b/.containerization/Containerfile.mcp
new file mode 100644
index 0000000..da2b3ed
--- /dev/null
+++ b/.containerization/Containerfile.mcp
@@ -0,0 +1,149 @@
+# SPDX-License-Identifier: PMPL-1.0-or-later
+# SPDX-FileCopyrightText: 2026 Jonathan D.A. Jewell <j.d.a.jewell@open.ac.uk>
+#
+# ECHIDNA MCP Container — exposes ECHIDNA as a Model Context Protocol server
+# over stdio so any MCP-capable AI agent (Claude Code, Claude Desktop, Claude
+# API with tool use, other clients) can call `prove({file, prover, timeout_secs})`
+# and get a real solver verdict.
+#
+# Bundled MVP prover set: Z3, CVC5, Coq, Agda, Metamath, SPASS (all apt), plus
+# Lean 4 (via elan), Isabelle, F*, and Mizar where upstream distributions
+# cooperate with an unprivileged build environment.
+#
+# Build:  podman build -f .containerization/Containerfile.mcp -t echidna:mcp .
+# Run:    podman run --rm -i echidna:mcp           # stdio MCP transport
+# Use:    {"mcpServers":{"echidna":{"command":"podman",
+#           "args":["run","--rm","-i","echidna:mcp"]}}}
+
+# =============================================================================
+# Stage 1: Rust builder — echidna + echidna-mcp + echidna-wire + typed_wasm
+# =============================================================================
+FROM docker.io/library/rust:1.85-slim-bookworm AS rust-builder
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        build-essential \
+        pkg-config \
+        libssl-dev \
+        capnproto \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /build
+
+COPY Cargo.toml Cargo.lock ./
+COPY src/rust ./src/rust
+COPY src/interfaces ./src/interfaces
+COPY crates ./crates
+COPY benches ./benches
+
+# Build both the CLI (echidna) and the MCP server (echidna-mcp). The MCP
+# server shells out to the CLI via the ECHIDNA_BIN env var.
+RUN cargo build --release --bin echidna -p echidna-mcp
+
+# =============================================================================
+# Stage 2: MVP prover installer (Debian apt-available set)
+# =============================================================================
+FROM docker.io/library/debian:bookworm-slim AS mvp-installer
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        z3 \
+        cvc5 \
+        coq \
+        agda-bin \
+        metamath \
+        spass \
+    && rm -rf /var/lib/apt/lists/*
+
+# =============================================================================
+# Stage 3: Lean 4 installer (elan + stable toolchain)
+# =============================================================================
+FROM docker.io/library/debian:bookworm-slim AS lean-installer
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        curl ca-certificates bash \
+    && rm -rf /var/lib/apt/lists/*
+RUN curl -fsSL https://raw.githubusercontent.com/leanprover/elan/master/elan-init.sh \
+        | bash -s -- -y --default-toolchain stable \
+    && . "$HOME/.elan/env" \
+    && lean --version
+
+# =============================================================================
+# Stage 4: Isabelle 2025 installer (tarball + pre-built Main heap)
+# =============================================================================
+FROM docker.io/library/debian:bookworm-slim AS isabelle-installer
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        curl ca-certificates perl fontconfig \
+    && rm -rf /var/lib/apt/lists/*
+RUN curl -fsSL --retry 3 \
+        "https://isabelle.in.tum.de/dist/Isabelle2025-2_linux.tar.gz" \
+        -o /tmp/isabelle.tar.gz \
+    && tar -xzf /tmp/isabelle.tar.gz -C /opt/ \
+    && rm /tmp/isabelle.tar.gz \
+    && rm -rf /opt/Isabelle2025-2/doc \
+    && /opt/Isabelle2025-2/bin/isabelle version \
+    && /opt/Isabelle2025-2/bin/isabelle build -b Main \
+    && rm -rf /root/.isabelle/Isabelle2025-2/log \
+              /root/.isabelle/Isabelle2025-2/browser_info
+
+# =============================================================================
+# Stage 5: F* installer (binary release)
+# =============================================================================
+FROM docker.io/library/debian:bookworm-slim AS fstar-installer
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        curl ca-certificates jq tar gzip \
+    && rm -rf /var/lib/apt/lists/*
+# Resolve the latest F* Linux x86_64 release tarball via the GitHub API
+# (asset naming varies between releases; don't hard-code a version).
+RUN set -eu; \
+    url=$(curl -fsSL https://api.github.com/repos/FStarLang/FStar/releases/latest \
+            | jq -r '.assets[]|select(.name|test("Linux|linux.*x86_64"))|.browser_download_url' \
+            | head -1); \
+    if [ -n "$url" ]; then \
+        curl -fsSL "$url" -o /tmp/fstar.tar.gz \
+        && mkdir -p /opt/fstar \
+        && tar -xzf /tmp/fstar.tar.gz -C /opt/fstar --strip-components=1 \
+        && /opt/fstar/bin/fstar.exe --version || true; \
+    else \
+        mkdir -p /opt/fstar/bin; \
+        echo "WARN: no F* linux asset found; stage empty" >&2; \
+    fi
+
+# =============================================================================
+# Stage 6: Runtime image
+# =============================================================================
+FROM docker.io/library/debian:bookworm-slim
+
+LABEL maintainer="Jonathan D.A. Jewell <j.d.a.jewell@open.ac.uk>"
+LABEL org.opencontainers.image.source="https://github.com/hyperpolymath/echidna"
+LABEL org.opencontainers.image.description="ECHIDNA MCP server — AI-agent-callable theorem proving over stdio"
+LABEL org.opencontainers.image.licenses="PMPL-1.0-or-later"
+LABEL org.opencontainers.image.version="2.1.0"
+
+# Runtime deps: the MVP provers + dynamic libs + perl/fontconfig for Isabelle.
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        ca-certificates \
+        libssl3 \
+        libgmp10 \
+        libstdc++6 \
+        perl \
+        fontconfig \
+        z3 \
+        cvc5 \
+        coq \
+        agda-bin \
+        metamath \
+        spass \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+COPY --from=rust-builder  /build/target/release/echidna     /app/bin/echidna
+COPY --from=rust-builder  /build/target/release/echidna-mcp /app/bin/echidna-mcp
+COPY --from=lean-installer /root/.elan                      /opt/elan
+COPY --from=isabelle-installer /opt/Isabelle2025-2          /opt/Isabelle2025-2
+COPY --from=isabelle-installer /root/.isabelle              /root/.isabelle
+COPY --from=fstar-installer /opt/fstar                      /opt/fstar
+
+ENV PATH="/app/bin:/opt/elan/toolchains/stable/bin:/opt/Isabelle2025-2/bin:/opt/fstar/bin:${PATH}"
+ENV ECHIDNA_BIN="/app/bin/echidna"
+
+# Stdio transport — no ports. The client connects via podman run -i.
+ENTRYPOINT ["/app/bin/echidna-mcp"]
diff --git a/src/rust/provers/abc.rs b/src/rust/provers/abc.rs
index da267f9..1a7f8b1 100644
--- a/src/rust/provers/abc.rs
+++ b/src/rust/provers/abc.rs
@@ -414,20 +414,34 @@ impl ProverBackend for AbcBackend {
             .context("Failed to read ABC input file")?;
 
         let extension = path.extension().and_then(|e| e.to_str());
-        self.parse_design(&content, extension)
+        let mut state = self.parse_design(&content, extension)?;
+        state.metadata.insert(
+            "abc_source".to_string(),
+            serde_json::Value::String(content.clone()),
+        );
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         // Without file extension context, try ABC command script first,
         // then fall back to AIGER ASCII if header starts with "aag"/"aig"
         let trimmed = content.trim();
-        if trimmed.starts_with("aag") || trimmed.starts_with("aig") {
-            self.parse_design(content, Some("aig"))
+        let mut state = if trimmed.starts_with("aag") || trimmed.starts_with("aig") {
+            self.parse_design(content, Some("aig"))?
         } else if trimmed.starts_with(".model") || trimmed.starts_with(".inputs") {
-            self.parse_design(content, Some("blif"))
+            self.parse_design(content, Some("blif"))?
         } else {
-            self.parse_design(content, None)
-        }
+            self.parse_design(content, None)?
+        };
+        state.metadata.insert(
+            "abc_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
+        Ok(state)
     }
 
     async fn apply_tactic(&self, state: &ProofState, tactic: &Tactic) -> Result<TacticResult> {
@@ -567,6 +581,131 @@ impl ProverBackend for AbcBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
+        // Prefer the original source — parsing ABC/BLIF/AIGER into the
+        // generic Term IR and reconstructing via `to_abc_script` loses
+        // structure for anything beyond a one-command script.
+        let method = state
+            .metadata
+            .get("abc_method")
+            .and_then(|v| v.as_str())
+            .unwrap_or("pdr");
+        let bmc_bound = state
+            .metadata
+            .get("bmc_bound")
+            .and_then(|v| v.as_str())
+            .unwrap_or("100");
+        let verify_cmd = match method {
+            "bmc" | "bmc3" => format!("bmc3 -F {}", bmc_bound),
+            "int" => "int".to_string(),
+            "dprove" => "dprove".to_string(),
+            "dsec" => "dsec".to_string(),
+            "cec" => "cec".to_string(),
+            _ => "pdr".to_string(),
+        };
+
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let p = std::path::Path::new(path);
+            let read_cmd = match p.extension().and_then(|e| e.to_str()) {
+                Some("aig") => format!("read_aiger {}", path),
+                Some("blif") => format!("read_blif {}", path),
+                Some("v") => format!("read_verilog {}", path),
+                _ => format!("read {}", path),
+            };
+            let script = format!("{}\n{}\nprint_status\nquit\n", read_cmd, verify_cmd);
+            let tmp_dir =
+                tempfile::tempdir().context("Failed to create temporary directory for ABC")?;
+            let tmp_file = tmp_dir.path().join("verify.abc");
+            tokio::fs::write(&tmp_file, &script)
+                .await
+                .context("Failed to write temporary ABC script file")?;
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout),
+                Command::new(&self.config.executable)
+                    .arg("-f")
+                    .arg(&tmp_file)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .map_err(|_| anyhow!("ABC timed out after {} seconds", self.config.timeout))?
+            .context("Failed to execute ABC")?;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            let combined = format!("{}\n{}", stdout, stderr);
+            return self.parse_result(&combined);
+        }
+
+        if let Some(source) = state.metadata.get("abc_source").and_then(|v| v.as_str()) {
+            let trimmed = source.trim();
+            let (ext, read_cmd_base) =
+                if trimmed.starts_with("aag") || trimmed.starts_with("aig") {
+                    (".aig", "read_aiger")
+                } else if trimmed.starts_with(".model") || trimmed.starts_with(".inputs") {
+                    (".blif", "read_blif")
+                } else {
+                    // Inline script — just execute it directly.
+                    ("", "")
+                };
+            let tmp_dir =
+                tempfile::tempdir().context("Failed to create temporary directory for ABC")?;
+            if ext.is_empty() {
+                let tmp_file = tmp_dir.path().join("verify.abc");
+                let script = format!("{}\n{}\nprint_status\nquit\n", source, verify_cmd);
+                tokio::fs::write(&tmp_file, &script)
+                    .await
+                    .context("Failed to write temporary ABC script file")?;
+                let output = tokio::time::timeout(
+                    tokio::time::Duration::from_secs(self.config.timeout),
+                    Command::new(&self.config.executable)
+                        .arg("-f")
+                        .arg(&tmp_file)
+                        .stdout(Stdio::piped())
+                        .stderr(Stdio::piped())
+                        .output(),
+                )
+                .await
+                .map_err(|_| anyhow!("ABC timed out after {} seconds", self.config.timeout))?
+                .context("Failed to execute ABC")?;
+                let stdout = String::from_utf8_lossy(&output.stdout);
+                let stderr = String::from_utf8_lossy(&output.stderr);
+                let combined = format!("{}\n{}", stdout, stderr);
+                return self.parse_result(&combined);
+            } else {
+                let design_file = tmp_dir.path().join(format!("design{}", ext));
+                tokio::fs::write(&design_file, source)
+                    .await
+                    .context("Failed to write temporary ABC design file")?;
+                let script_file = tmp_dir.path().join("verify.abc");
+                let script = format!(
+                    "{} {}\n{}\nprint_status\nquit\n",
+                    read_cmd_base,
+                    design_file.display(),
+                    verify_cmd
+                );
+                tokio::fs::write(&script_file, &script)
+                    .await
+                    .context("Failed to write temporary ABC script file")?;
+                let output = tokio::time::timeout(
+                    tokio::time::Duration::from_secs(self.config.timeout),
+                    Command::new(&self.config.executable)
+                        .arg("-f")
+                        .arg(&script_file)
+                        .stdout(Stdio::piped())
+                        .stderr(Stdio::piped())
+                        .output(),
+                )
+                .await
+                .map_err(|_| anyhow!("ABC timed out after {} seconds", self.config.timeout))?
+                .context("Failed to execute ABC")?;
+                let stdout = String::from_utf8_lossy(&output.stdout);
+                let stderr = String::from_utf8_lossy(&output.stderr);
+                let combined = format!("{}\n{}", stdout, stderr);
+                return self.parse_result(&combined);
+            }
+        }
+
+        // Fallback: reconstruct an ABC script from the parsed IR.
         let abc_script = self.to_abc_script(state)?;
 
         // ABC can execute commands via -c flag or from a script file.
diff --git a/src/rust/provers/acl2.rs b/src/rust/provers/acl2.rs
index d68fa90..3d563dc 100644
--- a/src/rust/provers/acl2.rs
+++ b/src/rust/provers/acl2.rs
@@ -1239,7 +1239,12 @@ impl ProverBackend for ACL2Backend {
             .await
             .context("Failed to read ACL2 file")?;
 
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
@@ -1310,11 +1315,16 @@ impl ProverBackend for ACL2Backend {
             }
         }
 
+        let mut metadata = HashMap::new();
+        metadata.insert(
+            "acl2_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         Ok(ProofState {
             goals,
             context,
             proof_script: vec![],
-            metadata: HashMap::new(),
+            metadata,
         })
     }
 
@@ -1380,6 +1390,47 @@ impl ProverBackend for ACL2Backend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
+        let exec = self
+            .config
+            .executable
+            .to_str()
+            .filter(|s| !s.is_empty())
+            .unwrap_or("acl2");
+
+        // Prefer the original Lisp source — ACL2's `term_to_sexp`
+        // round-trip is lossy for anything beyond trivial arithmetic.
+        let source_file: Option<PathBuf> =
+            if let Some(p) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+                Some(PathBuf::from(p))
+            } else if let Some(src) = state.metadata.get("acl2_source").and_then(|v| v.as_str()) {
+                let tmp = std::env::temp_dir()
+                    .join(format!("echidna_acl2_verify_{}.lisp", uuid::Uuid::new_v4()));
+                tokio::fs::write(&tmp, src)
+                    .await
+                    .context("Failed to write temp file")?;
+                Some(tmp)
+            } else {
+                None
+            };
+
+        if let Some(path) = source_file {
+            let stdin_file = std::fs::File::open(&path)
+                .context("Failed to open source file for ACL2 stdin")?;
+            let output = Command::new(exec)
+                .stdin(std::process::Stdio::from(stdin_file))
+                .output()
+                .await
+                .context("Failed to run ACL2")?;
+            if state.metadata.get("source_path").is_none() {
+                let _ = tokio::fs::remove_file(&path).await;
+            }
+            let output_str = String::from_utf8_lossy(&output.stdout);
+            let err_str = String::from_utf8_lossy(&output.stderr);
+            let combined = format!("{}{}", output_str, err_str);
+            return Ok(combined.contains("Q.E.D.")
+                || (combined.contains("Summary") && !combined.contains("FAILED")));
+        }
+
         if state.goals.is_empty() {
             return Ok(true);
         }
@@ -1394,14 +1445,6 @@ impl ProverBackend for ACL2Backend {
             .await
             .context("Failed to write temp file")?;
 
-        // Run ACL2 in batch mode
-        let exec = self
-            .config
-            .executable
-            .to_str()
-            .filter(|s| !s.is_empty())
-            .unwrap_or("acl2");
-
         let stdin_file =
             std::fs::File::open(&temp_file).context("Failed to open temp file for ACL2 stdin")?;
 
diff --git a/src/rust/provers/agda.rs b/src/rust/provers/agda.rs
index 2346ccf..b58c4da 100644
--- a/src/rust/provers/agda.rs
+++ b/src/rust/provers/agda.rs
@@ -255,7 +255,12 @@ impl ProverBackend for AgdaBackend {
         let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read Agda file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
@@ -299,11 +304,16 @@ impl ProverBackend for AgdaBackend {
         context.theorems = theorems;
         let goals = self.extract_goals(content).await?;
 
+        let mut metadata = HashMap::new();
+        metadata.insert(
+            "agda_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         Ok(ProofState {
             goals,
             context,
             proof_script: Vec::new(),
-            metadata: HashMap::new(),
+            metadata,
         })
     }
 
@@ -349,6 +359,51 @@ impl ProverBackend for AgdaBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
+        // Prefer the original .agda file when parse_file stashed its path.
+        // The previous `state.goals.is_empty() → Ok(true)` shortcut produced
+        // fake-success — Agda's parser doesn't populate goals the way the
+        // generic pipeline expects, so genuine files hit it without the
+        // solver ever running.
+        //
+        // Agda checks that the top-level module name matches the file path
+        // *relative to its CWD*, so we spawn the process from the file's
+        // parent directory — otherwise every non-trivial file trips
+        // "module … does not match file name".
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let p = std::path::Path::new(path);
+            let mut cmd = Command::new(&self.config.executable);
+            if let Some(parent) = p.parent() {
+                cmd.current_dir(parent);
+            }
+            let output = cmd
+                .arg(p.file_name().map(std::path::Path::new).unwrap_or(p))
+                .output()
+                .await?;
+            return Ok(output.status.success());
+        }
+        if let Some(source) = state.metadata.get("agda_source").and_then(|v| v.as_str()) {
+            let temp_file = std::env::temp_dir()
+                .join(format!("echidna_agda_verify_{}.agda", uuid::Uuid::new_v4()));
+            tokio::fs::write(&temp_file, source).await?;
+            let mut cmd = Command::new(&self.config.executable);
+            if let Some(parent) = temp_file.parent() {
+                cmd.current_dir(parent);
+            }
+            let output = cmd
+                .arg(
+                    temp_file
+                        .file_name()
+                        .map(std::path::Path::new)
+                        .unwrap_or(&temp_file),
+                )
+                .output()
+                .await?;
+            let _ = tokio::fs::remove_file(&temp_file).await;
+            return Ok(output.status.success());
+        }
+
+        // Fallback: reconstruct from IR (lossy). Only hit for synthetic
+        // states never backed by real source.
         if state.goals.is_empty() {
             return Ok(true);
         }
diff --git a/src/rust/provers/altergo.rs b/src/rust/provers/altergo.rs
index 60178d3..160cedb 100644
--- a/src/rust/provers/altergo.rs
+++ b/src/rust/provers/altergo.rs
@@ -112,14 +112,23 @@ impl ProverBackend for AltErgoBackend {
     }
 
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        let content = tokio::fs::read_to_string(path)
+        let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read proof file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "altergo_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
 
         for line in content.lines() {
             let line = line.trim();
@@ -154,19 +163,36 @@ impl ProverBackend for AltErgoBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let ae_code = self.to_altergo(state)?;
-
-        // Write to temp file since Alt-Ergo prefers file input
-        let tmp_dir = std::env::temp_dir();
-        let tmp_file = tmp_dir.join("echidna_altergo_input.ae");
-        tokio::fs::write(&tmp_file, ae_code.as_bytes())
-            .await
-            .context("Failed to write temp file")?;
+        // Prefer the original .ae source — `to_altergo(state)` round-trips
+        // Alt-Ergo syntax through the generic Term IR, which mangles
+        // anything beyond toy inputs.
+        let (path_to_check, is_temp): (PathBuf, bool) =
+            if let Some(p) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+                (PathBuf::from(p), false)
+            } else if let Some(src) = state
+                .metadata
+                .get("altergo_source")
+                .and_then(|v| v.as_str())
+            {
+                let tmp = std::env::temp_dir()
+                    .join(format!("echidna_altergo_{}.ae", uuid::Uuid::new_v4()));
+                tokio::fs::write(&tmp, src)
+                    .await
+                    .context("Failed to write temp file")?;
+                (tmp, true)
+            } else {
+                let ae_code = self.to_altergo(state)?;
+                let tmp = std::env::temp_dir().join("echidna_altergo_input.ae");
+                tokio::fs::write(&tmp, ae_code.as_bytes())
+                    .await
+                    .context("Failed to write temp file")?;
+                (tmp, true)
+            };
 
         let output = tokio::time::timeout(
             tokio::time::Duration::from_secs(self.config.timeout + 5),
             Command::new(&self.config.executable)
-                .arg(&tmp_file)
+                .arg(&path_to_check)
                 .arg("--timelimit")
                 .arg(format!("{}", self.config.timeout))
                 .output(),
@@ -174,8 +200,9 @@ impl ProverBackend for AltErgoBackend {
         .await
         .context("Alt-Ergo timed out")??;
 
-        // Clean up temp file
-        let _ = tokio::fs::remove_file(&tmp_file).await;
+        if is_temp {
+            let _ = tokio::fs::remove_file(&path_to_check).await;
+        }
 
         let stdout = String::from_utf8_lossy(&output.stdout);
         self.parse_result(&stdout)
diff --git a/src/rust/provers/coq.rs b/src/rust/provers/coq.rs
index e36c81d..0b2408a 100644
--- a/src/rust/provers/coq.rs
+++ b/src/rust/provers/coq.rs
@@ -834,7 +834,12 @@ impl ProverBackend for CoqBackend {
             .await
             .context("Failed to read file")?;
 
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
@@ -891,11 +896,16 @@ impl ProverBackend for CoqBackend {
             }
         }
 
+        let mut metadata = HashMap::new();
+        metadata.insert(
+            "coq_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
         Ok(ProofState {
             goals,
             context,
             proof_script: proof_tactics,
-            metadata: HashMap::new(),
+            metadata,
         })
     }
 
@@ -922,10 +932,36 @@ impl ProverBackend for CoqBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        // Export the proof and try to check it
-        let proof_script = self.export(state).await?;
+        // Prefer the original file when parse_file stashed its path — runs
+        // coqc on exactly what the user wrote, avoiding the lossy export
+        // round-trip (the Coq parser's Term IR is incomplete for anything
+        // beyond trivial syntax).
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = Command::new("coqc")
+                .arg(path)
+                .output()
+                .await
+                .context("Failed to run coqc")?;
+            return Ok(output.status.success());
+        }
+        if let Some(source) = state.metadata.get("coq_source").and_then(|v| v.as_str()) {
+            let temp_file = std::env::temp_dir()
+                .join(format!("echidna_coq_verify_{}.v", uuid::Uuid::new_v4()));
+            tokio::fs::write(&temp_file, source)
+                .await
+                .context("Failed to write temp file")?;
+            let output = Command::new("coqc")
+                .arg(&temp_file)
+                .output()
+                .await
+                .context("Failed to run coqc")?;
+            let _ = tokio::fs::remove_file(&temp_file).await;
+            return Ok(output.status.success());
+        }
 
-        // Write to temporary file and check with coqc
+        // Fallback: reconstruct from IR. Lossy; only hit for synthetic states
+        // that were never parsed from real Coq source.
+        let proof_script = self.export(state).await?;
         let temp_file = std::env::temp_dir().join("echidna_coq_verify.v");
         tokio::fs::write(&temp_file, proof_script)
             .await
diff --git a/src/rust/provers/cvc5.rs b/src/rust/provers/cvc5.rs
index 31aa957..0f53127 100644
--- a/src/rust/provers/cvc5.rs
+++ b/src/rust/provers/cvc5.rs
@@ -547,11 +547,21 @@ impl ProverBackend for CVC5Backend {
         let content = tokio::fs::read_to_string(&path)
             .await
             .context(format!("Failed to read file: {:?}", path))?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
-        self.parse_smtlib_content(content).await
+        let mut state = self.parse_smtlib_content(content).await?;
+        state.metadata.insert(
+            "cvc5_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
+        Ok(state)
     }
 
     async fn apply_tactic(&self, state: &ProofState, tactic: &Tactic) -> Result<TacticResult> {
@@ -603,21 +613,30 @@ impl ProverBackend for CVC5Backend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        if state.goals.is_empty() {
-            return Ok(true);
-        }
-
-        let mut commands = String::new();
-        commands.push_str("(set-logic ALL)\n");
-        for var in &state.context.variables {
-            let ty = self.term_to_smtlib(&var.ty)?;
-            commands.push_str(&format!("(declare-const {} {})\n", var.name, ty));
-        }
-        for goal in &state.goals {
-            let smtlib = self.term_to_smtlib(&goal.target)?;
-            commands.push_str(&format!("(assert (not {}))\n", smtlib));
-        }
-        commands.push_str("(check-sat)\n");
+        // Prefer the original SMT-LIB source when parse_file / parse_string
+        // stashed it.  Skipping the Term IR round-trip is the only way to
+        // keep non-trivial CVC5 inputs intact.
+        let commands = if let Some(source) =
+            state.metadata.get("cvc5_source").and_then(|v| v.as_str())
+        {
+            source.to_string()
+        } else {
+            if state.goals.is_empty() {
+                return Ok(true);
+            }
+            let mut s = String::new();
+            s.push_str("(set-logic ALL)\n");
+            for var in &state.context.variables {
+                let ty = self.term_to_smtlib(&var.ty)?;
+                s.push_str(&format!("(declare-const {} {})\n", var.name, ty));
+            }
+            for goal in &state.goals {
+                let smtlib = self.term_to_smtlib(&goal.target)?;
+                s.push_str(&format!("(assert (not {}))\n", smtlib));
+            }
+            s.push_str("(check-sat)\n");
+            s
+        };
 
         let temp_file =
             std::env::temp_dir().join(format!("echidna_cvc5_verify_{}.smt2", Uuid::new_v4()));
diff --git a/src/rust/provers/eprover.rs b/src/rust/provers/eprover.rs
index 75ccfb2..c08a074 100644
--- a/src/rust/provers/eprover.rs
+++ b/src/rust/provers/eprover.rs
@@ -96,14 +96,23 @@ impl ProverBackend for EProverBackend {
     }
 
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        let content = tokio::fs::read_to_string(path)
+        let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read proof file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "eprover_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
 
         for line in content.lines() {
             let line = line.trim();
@@ -137,7 +146,37 @@ impl ProverBackend for EProverBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let tptp_code = self.to_tptp(state)?;
+        // Prefer the original TPTP source when parse_file / parse_string
+        // stashed it.  `to_tptp(state)` round-trips through the generic
+        // Term IR and silently mangles anything beyond toy axioms.
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 5),
+                Command::new(&self.config.executable)
+                    .arg("--auto")
+                    .arg("--tptp3-format")
+                    .arg("--cpu-limit")
+                    .arg(format!("{}", self.config.timeout))
+                    .arg("--proof-object=0")
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .context("E Prover timed out")??;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            return self.parse_result(&stdout);
+        }
+        let tptp_code = if let Some(src) = state
+            .metadata
+            .get("eprover_source")
+            .and_then(|v| v.as_str())
+        {
+            src.to_string()
+        } else {
+            self.to_tptp(state)?
+        };
 
         let mut child = Command::new(&self.config.executable)
             .arg("--auto")
diff --git a/src/rust/provers/hol4.rs b/src/rust/provers/hol4.rs
index fb5c236..a401f53 100644
--- a/src/rust/provers/hol4.rs
+++ b/src/rust/provers/hol4.rs
@@ -2149,7 +2149,16 @@ impl ProverBackend for Hol4Backend {
             .await
             .context("Failed to read HOL4 file")?;
 
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        state.metadata.insert(
+            "hol4_source".to_string(),
+            serde_json::Value::String(content),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
@@ -2245,6 +2254,28 @@ impl ProverBackend for Hol4Backend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
+        // Previously `Ok(state.goals.is_empty())` — fake success without
+        // ever invoking HOL4.  When parse_file stashed the path, feed the
+        // file to HOL4 via Holmake (the standard batch build driver).
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let p = std::path::Path::new(path);
+            let mut cmd = Command::new(&self.config.executable);
+            if let Some(parent) = p.parent() {
+                cmd.current_dir(parent);
+            }
+            cmd.arg("--holdir")
+                .arg(p.file_name().map(std::path::Path::new).unwrap_or(p));
+            let output = cmd.output().await.context("Failed to invoke HOL4")?;
+            let combined = format!(
+                "{}{}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            );
+            let verified = output.status.success()
+                && !combined.to_lowercase().contains("exception")
+                && !combined.to_lowercase().contains("error");
+            return Ok(verified);
+        }
         Ok(state.goals.is_empty())
     }
 
diff --git a/src/rust/provers/metamath.rs b/src/rust/provers/metamath.rs
index 7c0d107..542241f 100644
--- a/src/rust/provers/metamath.rs
+++ b/src/rust/provers/metamath.rs
@@ -287,7 +287,12 @@ impl ProverBackend for MetamathBackend {
         let content = fs::read_to_string(&path)
             .await
             .with_context(|| format!("Failed to read file: {:?}", path))?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
@@ -397,13 +402,54 @@ impl ProverBackend for MetamathBackend {
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
         info!("Verifying Metamath proof with {} goals", state.goals.len());
 
-        if !state.goals.is_empty() {
-            debug!("Proof incomplete: {} goals remaining", state.goals.len());
-            return Ok(false);
+        // Real verification path: shell out to the `metamath` CLI and run
+        // its `verify proof *` command over the original source.  The
+        // previous stub — returning `Ok(state.goals.is_empty())` — never
+        // invoked the solver at all.
+        let source_path = state
+            .metadata
+            .get("source_path")
+            .and_then(|v| v.as_str())
+            .map(PathBuf::from);
+
+        if let Some(path) = source_path {
+            use tokio::io::AsyncWriteExt;
+            use tokio::process::Command;
+
+            let mut child = Command::new(&self.config.executable)
+                .stdin(std::process::Stdio::piped())
+                .stdout(std::process::Stdio::piped())
+                .stderr(std::process::Stdio::piped())
+                .spawn()
+                .context("Failed to spawn metamath")?;
+
+            if let Some(mut stdin) = child.stdin.take() {
+                let script = format!(
+                    "read \"{}\"\nverify proof *\nexit\n",
+                    path.display()
+                );
+                stdin.write_all(script.as_bytes()).await?;
+                stdin.flush().await?;
+                drop(stdin);
+            }
+
+            let output = child.wait_with_output().await?;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            let combined = format!("{}{}", stdout, stderr);
+
+            // metamath prints either "All proofs in the database were
+            // verified" on success, or "?Error" / "proof is incomplete"
+            // on failure.
+            let verified = combined.contains("All proofs in the database were verified")
+                && !combined.contains("?Error")
+                && !combined.to_lowercase().contains("proof is incomplete");
+            return Ok(verified);
         }
 
-        // All goals satisfied
-        Ok(true)
+        // Fallback when no source path was stashed (synthetic ProofState):
+        // all we can say is "no goals left means OK".
+        Ok(state.goals.is_empty())
     }
 
     async fn export(&self, state: &ProofState) -> Result<String> {
diff --git a/src/rust/provers/pvs.rs b/src/rust/provers/pvs.rs
index 331bf92..6b25fa1 100644
--- a/src/rust/provers/pvs.rs
+++ b/src/rust/provers/pvs.rs
@@ -2692,7 +2692,16 @@ impl ProverBackend for PVSBackend {
             .await
             .context("Failed to read PVS file")?;
 
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        state.metadata.insert(
+            "pvs_source".to_string(),
+            serde_json::Value::String(content),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
@@ -2824,6 +2833,29 @@ impl ProverBackend for PVSBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
+        // The previous impl was a hollow stub — `Ok(state.goals.is_empty())`
+        // would happily report SUCCESS on a file that never went near PVS.
+        // Shell out to PVS in batch mode against the original source when
+        // it was stashed by parse_file; fall through to the legacy "empty
+        // goals == ok" shortcut only for synthetic ProofStates.
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = Command::new(&self.config.executable)
+                .arg("-batch")
+                .arg("-typecheck")
+                .arg(path)
+                .output()
+                .await
+                .context("Failed to invoke PVS")?;
+            let combined = format!(
+                "{}{}",
+                String::from_utf8_lossy(&output.stdout),
+                String::from_utf8_lossy(&output.stderr)
+            );
+            let verified = output.status.success()
+                && !combined.to_lowercase().contains("error")
+                && !combined.to_lowercase().contains("typecheck failed");
+            return Ok(verified);
+        }
         Ok(state.goals.is_empty())
     }
 
diff --git a/src/rust/provers/spass.rs b/src/rust/provers/spass.rs
index 61e318d..33755b9 100644
--- a/src/rust/provers/spass.rs
+++ b/src/rust/provers/spass.rs
@@ -18,7 +18,6 @@ use anyhow::{Context, Result};
 use async_trait::async_trait;
 use std::path::PathBuf;
 use std::process::Stdio;
-use tokio::io::AsyncWriteExt;
 use tokio::process::Command;
 
 use super::{ProverBackend, ProverConfig, ProverKind};
@@ -105,14 +104,23 @@ impl ProverBackend for SPASSBackend {
     }
 
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        let content = tokio::fs::read_to_string(path)
+        let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read proof file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "spass_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
 
         let mut in_axioms = false;
         let mut in_conjectures = false;
@@ -157,28 +165,47 @@ impl ProverBackend for SPASSBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let dfg_code = self.to_dfg(state)?;
-
-        let mut child = Command::new(&self.config.executable)
-            .arg("-TimeLimit")
-            .arg(format!("{}", self.config.timeout))
-            .arg("-PGiven=0")
-            .arg("-PProblem=0")
-            .stdin(Stdio::piped())
-            .stdout(Stdio::piped())
-            .stderr(Stdio::piped())
-            .spawn()
-            .context("Failed to spawn SPASS process")?;
-
-        if let Some(mut stdin) = child.stdin.take() {
-            stdin.write_all(dfg_code.as_bytes()).await?;
-            stdin.flush().await?;
-            drop(stdin);
-        }
-
+        // Prefer the original DFG source when parse_file / parse_string
+        // stashed it.  `to_dfg(state)` reconstructs DFG from the parsed
+        // conjecture list, which silently mangles anything beyond toy
+        // single-atom formulae.  If a real path is available, pass that
+        // directly; otherwise materialise the stashed source (or the
+        // lossy reconstruction) to a temp file.
+        //
+        // SPASS rejects input on stdin — the upstream binary parses the
+        // stdin stream as additional command-line flags — so we always
+        // pass a file path as the final positional argument.
+        let path_to_check: PathBuf =
+            if let Some(p) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+                PathBuf::from(p)
+            } else {
+                let dfg_code = state
+                    .metadata
+                    .get("spass_source")
+                    .and_then(|v| v.as_str())
+                    .map(ToOwned::to_owned)
+                    .map(Ok)
+                    .unwrap_or_else(|| self.to_dfg(state))?;
+                let tmp = std::env::temp_dir()
+                    .join(format!("echidna_spass_{}.dfg", uuid::Uuid::new_v4()));
+                tokio::fs::write(&tmp, dfg_code).await?;
+                tmp
+            };
+
+        // Don't pass -PGiven=0 / -PProblem=0 here: those suppress the
+        // "SPASS beiseite: Proof found." banner that `parse_result`
+        // matches on.
+        // SPASS flag syntax is `-Name=Value` (not `-Name Value`).  Passing
+        // them separated makes SPASS parse the number as a filename and
+        // exit with "Error in opening file N for reading".
         let output = tokio::time::timeout(
             tokio::time::Duration::from_secs(self.config.timeout + 5),
-            child.wait_with_output(),
+            Command::new(&self.config.executable)
+                .arg(format!("-TimeLimit={}", self.config.timeout))
+                .arg(&path_to_check)
+                .stdout(Stdio::piped())
+                .stderr(Stdio::piped())
+                .output(),
         )
         .await
         .context("SPASS timed out")??;
diff --git a/src/rust/provers/vampire.rs b/src/rust/provers/vampire.rs
index 852f119..9fd03c5 100644
--- a/src/rust/provers/vampire.rs
+++ b/src/rust/provers/vampire.rs
@@ -102,14 +102,23 @@ impl ProverBackend for VampireBackend {
     }
 
     async fn parse_file(&self, path: PathBuf) -> Result<ProofState> {
-        let content = tokio::fs::read_to_string(path)
+        let content = tokio::fs::read_to_string(&path)
             .await
             .context("Failed to read proof file")?;
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
         let mut state = ProofState::default();
+        state.metadata.insert(
+            "vampire_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
 
         for line in content.lines() {
             let line = line.trim();
@@ -143,7 +152,45 @@ impl ProverBackend for VampireBackend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
-        let tptp_code = self.to_tptp(state)?;
+        // Prefer the original TPTP source — `to_tptp(state)` round-trips
+        // through the generic Term IR and silently mangles non-trivial
+        // inputs.
+        if let Some(path) = state.metadata.get("source_path").and_then(|v| v.as_str()) {
+            let output = tokio::time::timeout(
+                tokio::time::Duration::from_secs(self.config.timeout + 5),
+                Command::new(&self.config.executable)
+                    .arg("--mode")
+                    .arg("casc")
+                    .arg("--input_syntax")
+                    .arg("tptp")
+                    .arg("--time_limit")
+                    .arg(format!("{}", self.config.timeout))
+                    .arg("--proof")
+                    .arg("off")
+                    .arg("--statistics")
+                    .arg("brief")
+                    .arg(path)
+                    .stdout(Stdio::piped())
+                    .stderr(Stdio::piped())
+                    .output(),
+            )
+            .await
+            .context("Vampire timed out")??;
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            return self
+                .parse_result(&stdout)
+                .or_else(|_| self.parse_result(&stderr));
+        }
+        let tptp_code = if let Some(src) = state
+            .metadata
+            .get("vampire_source")
+            .and_then(|v| v.as_str())
+        {
+            src.to_string()
+        } else {
+            self.to_tptp(state)?
+        };
 
         let mut child = Command::new(&self.config.executable)
             .arg("--mode")
diff --git a/src/rust/provers/z3.rs b/src/rust/provers/z3.rs
index 6f01d69..f740902 100644
--- a/src/rust/provers/z3.rs
+++ b/src/rust/provers/z3.rs
@@ -236,11 +236,21 @@ impl ProverBackend for Z3Backend {
             .await
             .with_context(|| format!("Failed to read file: {:?}", path))?;
 
-        self.parse_string(&content).await
+        let mut state = self.parse_string(&content).await?;
+        state.metadata.insert(
+            "source_path".to_string(),
+            serde_json::Value::String(path.to_string_lossy().into_owned()),
+        );
+        Ok(state)
     }
 
     async fn parse_string(&self, content: &str) -> Result<ProofState> {
-        self.parse_smt_file(content)
+        let mut state = self.parse_smt_file(content)?;
+        state.metadata.insert(
+            "z3_source".to_string(),
+            serde_json::Value::String(content.to_string()),
+        );
+        Ok(state)
     }
 
     async fn apply_tactic(&self, state: &ProofState, tactic: &Tactic) -> Result<TacticResult> {
@@ -308,6 +318,15 @@ impl ProverBackend for Z3Backend {
     }
 
     async fn verify_proof(&self, state: &ProofState) -> Result<bool> {
+        // Prefer the original SMT-LIB source when it was stashed by
+        // parse_file / parse_string.  Running Z3 on the raw user input
+        // avoids the lossy parse → Term IR → term_to_smt round-trip,
+        // which silently corrupts anything beyond trivial inputs.
+        if let Some(source) = state.metadata.get("z3_source").and_then(|v| v.as_str()) {
+            let result = self.execute_command(source).await?;
+            return Ok(matches!(result, SmtResult::Unsat));
+        }
+
         if state.goals.is_empty() {
             return Ok(true);
         }
@@ -326,17 +345,17 @@ impl ProverBackend for Z3Backend {
             return Ok(false);
         }
 
-        // Build complete SMT-LIB query with variable declarations
+        // Fallback: reconstruct an SMT-LIB query from the parsed IR.
+        // Lossy for anything complex; only reached when no raw source
+        // was captured (e.g. synthetic ProofState built at runtime).
         let mut commands = String::new();
         commands.push_str("(set-logic ALL)\n");
 
-        // Include variable declarations from context
         for var in &state.context.variables {
             let ty_smt = self.term_to_smt(&var.ty);
             commands.push_str(&format!("(declare-const {} {})\n", var.name, ty_smt));
         }
 
-        // Assert negation of each goal (if unsat, goal is valid)
         for goal in &state.goals {
             let smt_goal = self.term_to_smt(&goal.target);
             commands.push_str(&format!("(assert (not {}))\n", smt_goal));