From 4d5e291696cab807245baa2d2f36d47a27993fcf Mon Sep 17 00:00:00 2001
From: intentos-dev <intentosapp@gmail.com>
Date: Sun, 21 Jun 2026 15:23:32 +0200
Subject: [PATCH 1/2] =?UTF-8?q?feat(bench):=20turnkey=20real-CU=20scaffold?=
 =?UTF-8?q?ing=20=E2=80=94=20fixture=20schema=20+=20bench=20CLI?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Land the locally-verifiable half of the turnkey real-CU path:

- `cu_profiler_core::bench::BenchPlan` — a declarative instruction/account
  fixture schema (program id, hex data, accounts) parsed from `bench.toml`,
  with base58 + hex validation. Pure Rust, no Solana deps, builds on every
  host the core targets; fully unit-tested.
- `cu-profiler bench` — validates the plan, resolves the program `.so`
  (`--program` / `$SBF_OUT_DIR` / `target/deploy`) or builds it with
  `cargo build-sbf` (`--build`), and reports the prepared plan.

The live Mollusk execution that converts a validated plan into real
compute units lives in the Linux-only `cu-profiler-mollusk` crate (the
SBF/Solana stack does not build on Windows, where the local gate runs),
and is the focused follow-up the SBF CI job validates. This commit is
fully gate-certified on the host.

Docs: reference §15 documents `bench` + the bench.toml schema; CHANGELOG.
Gate: grade A.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                                 |   8 +
 crates/cu-profiler-cli/src/args.rs           |  29 +++
 crates/cu-profiler-cli/src/commands/bench.rs | 142 ++++++++++++
 crates/cu-profiler-cli/src/commands/mod.rs   |   2 +
 crates/cu-profiler-cli/src/main.rs           |   1 +
 crates/cu-profiler-cli/tests/cli.rs          |  36 +++
 crates/cu-profiler-core/src/bench.rs         | 229 +++++++++++++++++++
 crates/cu-profiler-core/src/lib.rs           |   1 +
 docs/reference.md                            |  32 +++
 9 files changed, 480 insertions(+)
 create mode 100644 crates/cu-profiler-cli/src/commands/bench.rs
 create mode 100644 crates/cu-profiler-core/src/bench.rs
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5efe10e..51cd1d5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,14 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
 
+### Added
+- **`cu-profiler bench` (turnkey real-CU, scaffolding).** A declarative bench-plan
+  schema (`cu_profiler_core::bench::BenchPlan`: instructions, program id, hex data,
+  accounts) with base58/hex validation, and a `bench` subcommand that validates the
+  plan and resolves/builds the program `.so` (`--program` / `--build` via
+  `cargo build-sbf`). Live Mollusk execution that produces real compute units is the
+  Linux-only follow-up (the SBF stack does not build on every host).
+
 ## [0.1.2] - 2026-06-20
 
 ### Security
diff --git a/crates/cu-profiler-cli/src/args.rs b/crates/cu-profiler-cli/src/args.rs
index a24f70a..d3cbb91 100644
--- a/crates/cu-profiler-cli/src/args.rs
+++ b/crates/cu-profiler-cli/src/args.rs
@@ -46,6 +46,8 @@ pub enum Command {
     Inspect(InspectArgs),
     /// Import a real transaction's logs (from a `getTransaction` JSON) as a scenario log.
     Import(ImportArgs),
+    /// Turnkey real-CU path: validate a bench plan and (optionally) build the program.
+    Bench(BenchArgs),
 }
 
 /// Inputs shared by `run`, `ci` and `compare`.
@@ -209,6 +211,33 @@ pub struct ImportArgs {
     pub logs_dir: PathBuf,
 }
 
+/// `cu-profiler bench` — turnkey real-CU path (scaffolding).
+///
+/// Validates a declarative bench plan and resolves/builds the program `.so`. Live
+/// Mollusk execution is delivered by the Linux-only `cu-profiler-mollusk` crate.
+#[derive(Debug, Args)]
+pub struct BenchArgs {
+    /// Bench fixture file (`[[instruction]]` declarations with accounts/data).
+    #[arg(long, default_value = "bench.toml")]
+    pub fixtures: PathBuf,
+
+    /// Path to the already-compiled program `.so` (skips building).
+    #[arg(long)]
+    pub program: Option<PathBuf>,
+
+    /// Program name (the `.so` stem) to locate under `$SBF_OUT_DIR`/`target/deploy`.
+    #[arg(long)]
+    pub program_name: Option<String>,
+
+    /// Build the program with `cargo build-sbf` before benchmarking.
+    #[arg(long)]
+    pub build: bool,
+
+    /// Directory to run `cargo build-sbf` in.
+    #[arg(long, default_value = ".")]
+    pub manifest_path: PathBuf,
+}
+
 /// `cu-profiler init`.
 #[derive(Debug, Args)]
 pub struct InitArgs {
diff --git a/crates/cu-profiler-cli/src/commands/bench.rs b/crates/cu-profiler-cli/src/commands/bench.rs
new file mode 100644
index 0000000..8cb37f1
--- /dev/null
+++ b/crates/cu-profiler-cli/src/commands/bench.rs
@@ -0,0 +1,142 @@
+//! `cu-profiler bench` — the turnkey real-CU path (scaffolding).
+//!
+//! `bench` reads a declarative [`BenchPlan`](cu_profiler_core::bench::BenchPlan) of
+//! instructions, validates it, and (optionally) builds the program with
+//! `cargo build-sbf`, resolving the compiled `.so`. It is the CLI surface for the
+//! one SOTA soft-spot versus Mollusk: a one-command real-CU measurement.
+//!
+//! The **live Mollusk execution** that turns a validated plan into real
+//! compute-unit numbers lives in the Linux-only `cu-profiler-mollusk` integration
+//! crate (the Solana/SBF stack does not build on every host the core targets). This
+//! command therefore prepares and validates the plan today; wiring the execution is
+//! a focused follow-up that the Linux SBF CI job validates.
+
+use std::path::{Path, PathBuf};
+use std::process::Command;
+
+use cu_profiler_core::bench::BenchPlan;
+use cu_profiler_core::{Error, Result};
+
+use crate::args::BenchArgs;
+use crate::commands::{MAX_LOG_BYTES, read_to_string_capped};
+use crate::exit::ExitCode;
+
+/// Execute the `bench` command.
+pub fn run(args: &BenchArgs, quiet: bool) -> Result<ExitCode> {
+    let text = read_to_string_capped(&args.fixtures, MAX_LOG_BYTES)?;
+    let plan = BenchPlan::from_toml(&text)?;
+
+    if args.build {
+        build_sbf(&args.manifest_path, quiet)?;
+    }
+    let artifact = resolve_artifact(args.program.as_deref(), args.program_name.as_deref());
+
+    if !quiet {
+        report_plan(&plan, artifact.as_deref());
+    }
+    Ok(ExitCode::Success)
+}
+
+/// Run `cargo build-sbf` in `dir` to compile the program to an `.so`.
+fn build_sbf(dir: &Path, quiet: bool) -> Result<()> {
+    if !quiet {
+        eprintln!(
+            "building program with `cargo build-sbf` in {}…",
+            dir.display()
+        );
+    }
+    let status = Command::new("cargo")
+        .arg("build-sbf")
+        .current_dir(dir)
+        .status()
+        .map_err(|e| {
+            Error::Simulation(format!(
+                "could not run `cargo build-sbf` (is the Solana SBF toolchain installed?): {e}"
+            ))
+        })?;
+    if !status.success() {
+        return Err(Error::Simulation(
+            "`cargo build-sbf` failed — see its output above".to_string(),
+        ));
+    }
+    Ok(())
+}
+
+/// Resolve the compiled program `.so`: an explicit `--program`, else the
+/// `SBF_OUT_DIR`/`target/deploy` convention for `--program-name`. Returns the first
+/// candidate that exists. Pure but for the env/exists checks, so it is unit-tested
+/// via [`artifact_candidates`].
+fn resolve_artifact(explicit: Option<&Path>, program_name: Option<&str>) -> Option<PathBuf> {
+    if let Some(p) = explicit {
+        return Some(p.to_path_buf());
+    }
+    let name = program_name?;
+    let sbf_out = std::env::var("SBF_OUT_DIR").ok();
+    artifact_candidates(sbf_out.as_deref(), Path::new("target/deploy"), name)
+        .into_iter()
+        .find(|p| p.exists())
+}
+
+/// The ordered `.so` lookup paths for `name`: `$SBF_OUT_DIR` first, then the
+/// `target/deploy` convention. Pure — existence is checked by the caller.
+fn artifact_candidates(sbf_out_dir: Option<&str>, deploy_dir: &Path, name: &str) -> Vec<PathBuf> {
+    let file = format!("{name}.so");
+    let mut out = Vec::new();
+    if let Some(dir) = sbf_out_dir {
+        out.push(Path::new(dir).join(&file));
+    }
+    out.push(deploy_dir.join(&file));
+    out
+}
+
+/// Print the validated plan and the boundary note (execution is the Linux follow-up).
+fn report_plan(plan: &BenchPlan, artifact: Option<&Path>) {
+    println!("bench plan OK: {} instruction(s)", plan.instructions.len());
+    for ix in &plan.instructions {
+        println!(
+            "  - {} → program {} ({} account(s), {} data byte(s))",
+            ix.scenario,
+            ix.program_id,
+            ix.accounts.len(),
+            ix.data.len() / 2,
+        );
+    }
+    match artifact {
+        Some(p) => println!("program artifact: {}", p.display()),
+        None => println!(
+            "program artifact: not resolved (pass --program or --build with --program-name)"
+        ),
+    }
+    eprintln!(
+        "note: live compute-unit execution runs on the Linux `cu-profiler-mollusk` backend; \
+         this build validates and prepares the plan only."
+    );
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn explicit_program_wins() {
+        let p = PathBuf::from("some/program.so");
+        assert_eq!(resolve_artifact(Some(&p), None), Some(p));
+    }
+
+    #[test]
+    fn candidates_prefer_sbf_out_dir_then_deploy() {
+        let c = artifact_candidates(Some("/out"), Path::new("target/deploy"), "amm");
+        assert_eq!(c.len(), 2);
+        assert!(c[0].ends_with("amm.so"));
+        assert!(c[0].to_string_lossy().contains("out"));
+        assert!(c[1].ends_with("amm.so"));
+        assert!(c[1].to_string_lossy().contains("deploy"));
+    }
+
+    #[test]
+    fn candidates_without_sbf_out_dir_is_deploy_only() {
+        let c = artifact_candidates(None, Path::new("target/deploy"), "amm");
+        assert_eq!(c.len(), 1);
+        assert!(c[0].ends_with("amm.so"));
+    }
+}
diff --git a/crates/cu-profiler-cli/src/commands/mod.rs b/crates/cu-profiler-cli/src/commands/mod.rs
index e4ca955..eadb082 100644
--- a/crates/cu-profiler-cli/src/commands/mod.rs
+++ b/crates/cu-profiler-cli/src/commands/mod.rs
@@ -2,6 +2,7 @@
 //! `cu-profiler-core`, render with `cu-profiler-report`, choose an exit code.
 
 mod baseline;
+mod bench;
 mod ci;
 mod compare;
 mod explain;
@@ -11,6 +12,7 @@ mod inspect;
 mod run;
 
 pub use baseline::{approve as baseline_approve, save as baseline_save};
+pub use bench::run as bench;
 pub use ci::run as ci;
 pub use compare::run as compare;
 pub use explain::run as explain;
diff --git a/crates/cu-profiler-cli/src/main.rs b/crates/cu-profiler-cli/src/main.rs
index 3035562..850a84d 100644
--- a/crates/cu-profiler-cli/src/main.rs
+++ b/crates/cu-profiler-cli/src/main.rs
@@ -39,6 +39,7 @@ fn dispatch(cli: &Cli) -> Result<ExitCode> {
         Command::Explain(args) => commands::explain(args, quiet),
         Command::Inspect(args) => commands::inspect(args, quiet),
         Command::Import(args) => commands::import(args, quiet),
+        Command::Bench(args) => commands::bench(args, quiet),
         Command::Baseline(args) => match &args.command {
             BaselineCommand::Save(a) => commands::baseline_save(a, quiet),
             BaselineCommand::Approve(a) => commands::baseline_approve(a, quiet),
diff --git a/crates/cu-profiler-cli/tests/cli.rs b/crates/cu-profiler-cli/tests/cli.rs
index 198b43b..978d212 100644
--- a/crates/cu-profiler-cli/tests/cli.rs
+++ b/crates/cu-profiler-cli/tests/cli.rs
@@ -260,6 +260,42 @@ fn import_file_without_logs_reports_error() {
     assert!(!dir.join(".cu/logs/empty.log").exists());
 }
 
+#[test]
+fn bench_validates_a_plan_and_summarises() {
+    let dir = scratch_dir("bench-ok");
+    let fixtures = dir.join("bench.toml");
+    std::fs::write(
+        &fixtures,
+        "[[instruction]]\nscenario=\"swap\"\nprogram_id=\"11111111111111111111111111111111\"\ndata=\"01ab\"\n",
+    )
+    .unwrap();
+
+    let out = run(&dir, &["bench", "--fixtures", fixtures.to_str().unwrap()]);
+    assert!(out.status.success(), "bench failed: {out:?}");
+    let stdout = String::from_utf8_lossy(&out.stdout);
+    assert!(
+        stdout.contains("bench plan OK: 1 instruction"),
+        "summary: {stdout}"
+    );
+    assert!(stdout.contains("swap"), "scenario: {stdout}");
+}
+
+#[test]
+fn bench_rejects_an_invalid_plan() {
+    let dir = scratch_dir("bench-bad");
+    let fixtures = dir.join("bench.toml");
+    // Non-base58 program id must be rejected with a non-zero exit.
+    std::fs::write(
+        &fixtures,
+        "[[instruction]]\nscenario=\"s\"\nprogram_id=\"not-valid-0OIl\"\n",
+    )
+    .unwrap();
+
+    let out = run(&dir, &["bench", "--fixtures", fixtures.to_str().unwrap()]);
+    assert!(!out.status.success(), "expected invalid plan to fail");
+    assert!(String::from_utf8_lossy(&out.stderr).contains("base58"));
+}
+
 #[cfg(feature = "anchor")]
 #[test]
 fn anchor_idl_labels_program_in_report() {
diff --git a/crates/cu-profiler-core/src/bench.rs b/crates/cu-profiler-core/src/bench.rs
new file mode 100644
index 0000000..4585b0d
--- /dev/null
+++ b/crates/cu-profiler-core/src/bench.rs
@@ -0,0 +1,229 @@
+//! Declarative fixtures for the turnkey real-CU `bench` path.
+//!
+//! A [`BenchPlan`] describes, as plain data, the instruction(s) to execute against
+//! a compiled Solana program so a live backend (Mollusk) can measure real compute
+//! units — no hand-written Rust harness required. This module owns only the
+//! **schema, parsing and validation**; it pulls in no Solana crates and runs
+//! everywhere the core does (including Windows). Converting a validated plan into
+//! `solana-instruction`/`solana-account` types and executing it lives in the
+//! Linux-only `cu-profiler-mollusk` integration crate.
+//!
+//! ```toml
+//! # bench.toml
+//! [[instruction]]
+//! scenario   = "swap_exact_in"
+//! program_id = "SwapPRogram1111111111111111111111111111"
+//! data       = "01ab"          # hex-encoded instruction data
+//!
+//!   [[instruction.account]]
+//!   pubkey   = "11111111111111111111111111111111"
+//!   signer   = true
+//!   writable = true
+//!   lamports = 1000000
+//! ```
+
+use serde::{Deserialize, Serialize};
+
+use crate::error::{Error, Result};
+
+/// A set of instruction fixtures to benchmark, parsed from a `bench.toml`.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
+#[serde(deny_unknown_fields)]
+pub struct BenchPlan {
+    /// One entry per instruction to execute and measure.
+    #[serde(default, rename = "instruction")]
+    pub instructions: Vec<InstructionFixture>,
+}
+
+/// A single instruction to execute against the program under test.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct InstructionFixture {
+    /// The scenario name this instruction measures (keys it to a `[scenario.<name>]`).
+    pub scenario: String,
+    /// The program's base58 address.
+    pub program_id: String,
+    /// Hex-encoded instruction data (empty string for a no-arg instruction).
+    #[serde(default)]
+    pub data: String,
+    /// Accounts passed to the instruction, in order.
+    #[serde(default, rename = "account")]
+    pub accounts: Vec<AccountFixture>,
+}
+
+/// One account in an instruction's account list.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct AccountFixture {
+    /// The account's base58 address.
+    pub pubkey: String,
+    /// Whether the account signs the transaction.
+    #[serde(default)]
+    pub signer: bool,
+    /// Whether the instruction may write to the account.
+    #[serde(default)]
+    pub writable: bool,
+    /// Starting lamport balance.
+    #[serde(default)]
+    pub lamports: u64,
+    /// Owning program (base58), if the account should be pre-owned.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub owner: Option<String>,
+    /// Hex-encoded initial account data, if any.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub data: Option<String>,
+}
+
+impl BenchPlan {
+    /// Parse and validate a plan from TOML.
+    ///
+    /// # Errors
+    /// Returns [`Error::Config`] for malformed TOML, an unknown key, a non-base58
+    /// program/account address, or non-hex instruction/account data.
+    pub fn from_toml(s: &str) -> Result<Self> {
+        let plan: BenchPlan = toml::from_str(s).map_err(|e| Error::Config(e.to_string()))?;
+        plan.validate()?;
+        Ok(plan)
+    }
+
+    /// Validate every fixture's addresses and encodings.
+    ///
+    /// # Errors
+    /// Returns [`Error::Config`] describing the first invalid field found.
+    pub fn validate(&self) -> Result<()> {
+        if self.instructions.is_empty() {
+            return Err(Error::Config(
+                "bench plan has no `[[instruction]]` entries".to_string(),
+            ));
+        }
+        for ix in &self.instructions {
+            ix.validate()?;
+        }
+        Ok(())
+    }
+}
+
+impl InstructionFixture {
+    fn validate(&self) -> Result<()> {
+        let ctx = format!("instruction `{}`", self.scenario);
+        if self.scenario.is_empty() {
+            return Err(Error::Config(
+                "an instruction has an empty `scenario`".to_string(),
+            ));
+        }
+        validate_base58(&self.program_id, &format!("{ctx}: program_id"))?;
+        validate_hex(&self.data, &format!("{ctx}: data"))?;
+        for acc in &self.accounts {
+            validate_base58(&acc.pubkey, &format!("{ctx}: account pubkey"))?;
+            if let Some(owner) = &acc.owner {
+                validate_base58(owner, &format!("{ctx}: account owner"))?;
+            }
+            if let Some(data) = &acc.data {
+                validate_hex(data, &format!("{ctx}: account data"))?;
+            }
+        }
+        Ok(())
+    }
+}
+
+/// The base58 alphabet Solana uses (Bitcoin alphabet: no `0`, `O`, `I`, `l`).
+const BASE58_ALPHABET: &[u8] = b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
+
+/// Validate that `s` looks like a base58-encoded 32-byte Solana address.
+///
+/// This checks the alphabet and the length window a 32-byte value encodes to
+/// (32–44 characters); it does not decode (which would need a base58 dependency).
+fn validate_base58(s: &str, what: &str) -> Result<()> {
+    if !(32..=44).contains(&s.len()) {
+        return Err(Error::Config(format!(
+            "{what}: `{s}` is not a 32-byte base58 address (length {})",
+            s.len()
+        )));
+    }
+    if let Some(bad) = s.bytes().find(|b| !BASE58_ALPHABET.contains(b)) {
+        return Err(Error::Config(format!(
+            "{what}: `{s}` contains a non-base58 character `{}`",
+            bad as char
+        )));
+    }
+    Ok(())
+}
+
+/// Validate that `s` is valid hex (even length, hex digits only). Empty is allowed.
+fn validate_hex(s: &str, what: &str) -> Result<()> {
+    if s.len() % 2 != 0 {
+        return Err(Error::Config(format!(
+            "{what}: hex string has an odd length ({})",
+            s.len()
+        )));
+    }
+    if let Some(bad) = s.bytes().find(|b| !b.is_ascii_hexdigit()) {
+        return Err(Error::Config(format!(
+            "{what}: `{s}` contains a non-hex character `{}`",
+            bad as char
+        )));
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    const SYS: &str = "11111111111111111111111111111111";
+
+    fn plan_toml() -> String {
+        format!(
+            "[[instruction]]\nscenario=\"swap\"\nprogram_id=\"{SYS}\"\ndata=\"01ab\"\n\
+             [[instruction.account]]\npubkey=\"{SYS}\"\nsigner=true\nwritable=true\nlamports=1000000\n"
+        )
+    }
+
+    #[test]
+    fn parses_and_validates_a_plan() {
+        let plan = BenchPlan::from_toml(&plan_toml()).unwrap();
+        assert_eq!(plan.instructions.len(), 1);
+        let ix = &plan.instructions[0];
+        assert_eq!(ix.scenario, "swap");
+        assert_eq!(ix.data, "01ab");
+        assert_eq!(ix.accounts.len(), 1);
+        assert!(ix.accounts[0].signer && ix.accounts[0].writable);
+    }
+
+    #[test]
+    fn empty_plan_is_rejected() {
+        assert!(BenchPlan::from_toml("").is_err());
+    }
+
+    #[test]
+    fn rejects_unknown_keys() {
+        let toml = format!("[[instruction]]\nscenario=\"s\"\nprogram_id=\"{SYS}\"\nbogus=1\n");
+        assert!(BenchPlan::from_toml(&toml).is_err());
+    }
+
+    #[test]
+    fn rejects_bad_base58_program_id() {
+        let toml = "[[instruction]]\nscenario=\"s\"\nprogram_id=\"not-base58-0OIl\"\n";
+        let err = BenchPlan::from_toml(toml).unwrap_err().to_string();
+        assert!(err.contains("base58"), "{err}");
+    }
+
+    #[test]
+    fn rejects_odd_and_nonhex_data() {
+        let odd = format!("[[instruction]]\nscenario=\"s\"\nprogram_id=\"{SYS}\"\ndata=\"abc\"\n");
+        assert!(
+            BenchPlan::from_toml(&odd)
+                .unwrap_err()
+                .to_string()
+                .contains("odd")
+        );
+        let nonhex =
+            format!("[[instruction]]\nscenario=\"s\"\nprogram_id=\"{SYS}\"\ndata=\"zz\"\n");
+        assert!(
+            BenchPlan::from_toml(&nonhex)
+                .unwrap_err()
+                .to_string()
+                .contains("non-hex")
+        );
+    }
+}
diff --git a/crates/cu-profiler-core/src/lib.rs b/crates/cu-profiler-core/src/lib.rs
index 8804273..5b2aaed 100644
--- a/crates/cu-profiler-core/src/lib.rs
+++ b/crates/cu-profiler-core/src/lib.rs
@@ -41,6 +41,7 @@
 pub mod anchor;
 pub mod backend;
 pub mod baseline;
+pub mod bench;
 pub mod budget;
 pub mod confidence;
 pub mod config;
diff --git a/docs/reference.md b/docs/reference.md
index e4ee198..ab90919 100644
--- a/docs/reference.md
+++ b/docs/reference.md
@@ -592,6 +592,38 @@ Diagnoses a single scenario.
 
 Reads an existing report and shows analysis without re-simulating.
 
+### `cu-profiler bench`
+
+Turnkey real-CU path (scaffolding). Reads a declarative bench plan (`bench.toml`),
+validates it, and resolves or builds the program `.so`. Flags:
+
+```
+--fixtures       bench plan file (default bench.toml)
+--program        path to a compiled .so (skips building)
+--program-name   .so stem to locate under $SBF_OUT_DIR / target/deploy
+--build          run `cargo build-sbf` first
+--manifest-path  directory to build in (default .)
+```
+
+A `bench.toml` declares the instruction(s) to execute as data:
+
+```toml
+[[instruction]]
+scenario   = "swap_exact_in"
+program_id = "SwapPRogram1111111111111111111111111111"
+data       = "01ab"               # hex instruction data
+
+  [[instruction.account]]
+  pubkey   = "11111111111111111111111111111111"
+  signer   = true
+  writable = true
+  lamports = 1000000
+```
+
+Live compute-unit execution runs on the Linux-only `cu-profiler-mollusk` backend
+(the Solana/SBF stack is not buildable on every host); this command validates and
+prepares the plan, and the Mollusk execution that produces real CU is wired on top.
+
 ---
 
 ## 16. Exit Codes

From edc81684b8a50bab4a2c7f316ee4670f560ef08b Mon Sep 17 00:00:00 2001
From: intentos-dev <intentosapp@gmail.com>
Date: Sun, 21 Jun 2026 16:09:02 +0200
Subject: [PATCH 2/2] feat(bench): cu-profiler bench measures via the
 cu-profiler-bench executor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Close the gap where `cu-profiler bench` validated a plan but did not
measure. It now delegates real Mollusk measurement to the Linux
`cu-profiler-bench` executor over PATH (a runtime sibling, never a build
dependency — the main CLI stays Solana-free / Windows-buildable):

- with `--program-name`: run the executor and forward its result; if it
  is not installed, fail with the exact command to run (no silent
  half-measure);
- without `--program-name`: validate the plan and summarise (a lint run).

Dropped the unused `--program` flag and the now-redundant artifact
resolution. Integration tests cover validate-only, invalid-plan
rejection, and the clear error when the executor is absent. Docs §15 and
CHANGELOG updated. Gate: grade A.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                                 |  14 +-
 crates/cu-profiler-cli/src/args.rs           |   7 +-
 crates/cu-profiler-cli/src/commands/bench.rs | 127 +++++++------------
 crates/cu-profiler-cli/tests/cli.rs          |  29 +++++
 docs/reference.md                            |  17 ++-
 5 files changed, 98 insertions(+), 96 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 51cd1d5..36d4341 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,12 +7,14 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 ## [Unreleased]
 
 ### Added
-- **`cu-profiler bench` (turnkey real-CU, scaffolding).** A declarative bench-plan
-  schema (`cu_profiler_core::bench::BenchPlan`: instructions, program id, hex data,
-  accounts) with base58/hex validation, and a `bench` subcommand that validates the
-  plan and resolves/builds the program `.so` (`--program` / `--build` via
-  `cargo build-sbf`). Live Mollusk execution that produces real compute units is the
-  Linux-only follow-up (the SBF stack does not build on every host).
+- **`cu-profiler bench` (turnkey real-CU).** A declarative bench-plan schema
+  (`cu_profiler_core::bench::BenchPlan`: instructions, program id, hex data, accounts)
+  with base58/hex validation, and a `bench` subcommand that validates the plan,
+  optionally builds the program (`--build` via `cargo build-sbf`), and — with
+  `--program-name` — measures real compute units by delegating to the Linux
+  `cu-profiler-bench` executor over `PATH`. The executor links the Solana stack and is
+  a runtime sibling, not a build dependency, so the main CLI stays Solana-free; when
+  it is absent, `bench` validates the plan and fails with the exact command to run.
 
 ## [0.1.2] - 2026-06-20
 
diff --git a/crates/cu-profiler-cli/src/args.rs b/crates/cu-profiler-cli/src/args.rs
index d3cbb91..90bb84b 100644
--- a/crates/cu-profiler-cli/src/args.rs
+++ b/crates/cu-profiler-cli/src/args.rs
@@ -221,11 +221,8 @@ pub struct BenchArgs {
     #[arg(long, default_value = "bench.toml")]
     pub fixtures: PathBuf,
 
-    /// Path to the already-compiled program `.so` (skips building).
-    #[arg(long)]
-    pub program: Option<PathBuf>,
-
-    /// Program name (the `.so` stem) to locate under `$SBF_OUT_DIR`/`target/deploy`.
+    /// Program name (the `.so` stem, loaded from `$SBF_OUT_DIR`). With it, `bench`
+    /// measures via the `cu-profiler-bench` executor; without it, validate only.
     #[arg(long)]
     pub program_name: Option<String>,
 
diff --git a/crates/cu-profiler-cli/src/commands/bench.rs b/crates/cu-profiler-cli/src/commands/bench.rs
index 8cb37f1..119a940 100644
--- a/crates/cu-profiler-cli/src/commands/bench.rs
+++ b/crates/cu-profiler-cli/src/commands/bench.rs
@@ -1,17 +1,16 @@
-//! `cu-profiler bench` — the turnkey real-CU path (scaffolding).
+//! `cu-profiler bench` — turnkey real-CU path.
 //!
-//! `bench` reads a declarative [`BenchPlan`](cu_profiler_core::bench::BenchPlan) of
-//! instructions, validates it, and (optionally) builds the program with
-//! `cargo build-sbf`, resolving the compiled `.so`. It is the CLI surface for the
-//! one SOTA soft-spot versus Mollusk: a one-command real-CU measurement.
+//! `bench` validates a declarative [`BenchPlan`](cu_profiler_core::bench::BenchPlan),
+//! optionally builds the program with `cargo build-sbf`, then **delegates the real
+//! Mollusk measurement** to the Linux-only `cu-profiler-bench` executor, found over
+//! `PATH` (a runtime sibling, never a build dependency — so the main CLI keeps the
+//! Solana/Mollusk stack out and stays Windows-buildable).
 //!
-//! The **live Mollusk execution** that turns a validated plan into real
-//! compute-unit numbers lives in the Linux-only `cu-profiler-mollusk` integration
-//! crate (the Solana/SBF stack does not build on every host the core targets). This
-//! command therefore prepares and validates the plan today; wiring the execution is
-//! a focused follow-up that the Linux SBF CI job validates.
+//! - With `--program-name`: run the executor and forward its result; if the executor
+//!   is not installed, fail with the exact command to run (no silent half-measure).
+//! - Without `--program-name`: validate the plan and summarise it (a lint/prepare run).
 
-use std::path::{Path, PathBuf};
+use std::path::Path;
 use std::process::Command;
 
 use cu_profiler_core::bench::BenchPlan;
@@ -21,6 +20,9 @@ use crate::args::BenchArgs;
 use crate::commands::{MAX_LOG_BYTES, read_to_string_capped};
 use crate::exit::ExitCode;
 
+/// The Linux-only sibling binary that performs the real Mollusk measurement.
+const EXECUTOR: &str = "cu-profiler-bench";
+
 /// Execute the `bench` command.
 pub fn run(args: &BenchArgs, quiet: bool) -> Result<ExitCode> {
     let text = read_to_string_capped(&args.fixtures, MAX_LOG_BYTES)?;
@@ -29,12 +31,25 @@ pub fn run(args: &BenchArgs, quiet: bool) -> Result<ExitCode> {
     if args.build {
         build_sbf(&args.manifest_path, quiet)?;
     }
-    let artifact = resolve_artifact(args.program.as_deref(), args.program_name.as_deref());
 
-    if !quiet {
-        report_plan(&plan, artifact.as_deref());
+    // With a program, measure for real via the executor; without one, validate only.
+    let Some(program_name) = &args.program_name else {
+        if !quiet {
+            summarise(&plan);
+        }
+        return Ok(ExitCode::Success);
+    };
+
+    match delegate(&args.fixtures, program_name) {
+        Some(code) => Ok(code),
+        None => Err(Error::Simulation(format!(
+            "plan is valid, but the `{EXECUTOR}` executor was not found on PATH, so no compute \
+             units were measured. It is Linux-only (built from the cu-profiler-mollusk crate, \
+             which links the Solana stack). Install it, then run:\n  \
+             {EXECUTOR} --fixtures {} --program-name {program_name}",
+            args.fixtures.display()
+        ))),
     }
-    Ok(ExitCode::Success)
 }
 
 /// Run `cargo build-sbf` in `dir` to compile the program to an `.so`.
@@ -62,35 +77,25 @@ fn build_sbf(dir: &Path, quiet: bool) -> Result<()> {
     Ok(())
 }
 
-/// Resolve the compiled program `.so`: an explicit `--program`, else the
-/// `SBF_OUT_DIR`/`target/deploy` convention for `--program-name`. Returns the first
-/// candidate that exists. Pure but for the env/exists checks, so it is unit-tested
-/// via [`artifact_candidates`].
-fn resolve_artifact(explicit: Option<&Path>, program_name: Option<&str>) -> Option<PathBuf> {
-    if let Some(p) = explicit {
-        return Some(p.to_path_buf());
-    }
-    let name = program_name?;
-    let sbf_out = std::env::var("SBF_OUT_DIR").ok();
-    artifact_candidates(sbf_out.as_deref(), Path::new("target/deploy"), name)
-        .into_iter()
-        .find(|p| p.exists())
-}
-
-/// The ordered `.so` lookup paths for `name`: `$SBF_OUT_DIR` first, then the
-/// `target/deploy` convention. Pure — existence is checked by the caller.
-fn artifact_candidates(sbf_out_dir: Option<&str>, deploy_dir: &Path, name: &str) -> Vec<PathBuf> {
-    let file = format!("{name}.so");
-    let mut out = Vec::new();
-    if let Some(dir) = sbf_out_dir {
-        out.push(Path::new(dir).join(&file));
-    }
-    out.push(deploy_dir.join(&file));
-    out
+/// Run the `cu-profiler-bench` executor, inheriting its stdout/stderr and returning a
+/// mapped exit code — or `None` when the executor is not on `PATH`.
+fn delegate(fixtures: &Path, program_name: &str) -> Option<ExitCode> {
+    let status = Command::new(EXECUTOR)
+        .arg("--fixtures")
+        .arg(fixtures)
+        .arg("--program-name")
+        .arg(program_name)
+        .status()
+        .ok()?;
+    Some(if status.success() {
+        ExitCode::Success
+    } else {
+        ExitCode::Simulation
+    })
 }
 
-/// Print the validated plan and the boundary note (execution is the Linux follow-up).
-fn report_plan(plan: &BenchPlan, artifact: Option<&Path>) {
+/// Validate-only output: print the parsed plan and how to measure it.
+fn summarise(plan: &BenchPlan) {
     println!("bench plan OK: {} instruction(s)", plan.instructions.len());
     for ix in &plan.instructions {
         println!(
@@ -101,42 +106,8 @@ fn report_plan(plan: &BenchPlan, artifact: Option<&Path>) {
             ix.data.len() / 2,
         );
     }
-    match artifact {
-        Some(p) => println!("program artifact: {}", p.display()),
-        None => println!(
-            "program artifact: not resolved (pass --program or --build with --program-name)"
-        ),
-    }
     eprintln!(
-        "note: live compute-unit execution runs on the Linux `cu-profiler-mollusk` backend; \
-         this build validates and prepares the plan only."
+        "note: plan validated. Pass --program-name and have the `{EXECUTOR}` executor on PATH \
+         (Linux; from the cu-profiler-mollusk crate) to measure real compute units."
     );
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn explicit_program_wins() {
-        let p = PathBuf::from("some/program.so");
-        assert_eq!(resolve_artifact(Some(&p), None), Some(p));
-    }
-
-    #[test]
-    fn candidates_prefer_sbf_out_dir_then_deploy() {
-        let c = artifact_candidates(Some("/out"), Path::new("target/deploy"), "amm");
-        assert_eq!(c.len(), 2);
-        assert!(c[0].ends_with("amm.so"));
-        assert!(c[0].to_string_lossy().contains("out"));
-        assert!(c[1].ends_with("amm.so"));
-        assert!(c[1].to_string_lossy().contains("deploy"));
-    }
-
-    #[test]
-    fn candidates_without_sbf_out_dir_is_deploy_only() {
-        let c = artifact_candidates(None, Path::new("target/deploy"), "amm");
-        assert_eq!(c.len(), 1);
-        assert!(c[0].ends_with("amm.so"));
-    }
-}
diff --git a/crates/cu-profiler-cli/tests/cli.rs b/crates/cu-profiler-cli/tests/cli.rs
index 978d212..15658a8 100644
--- a/crates/cu-profiler-cli/tests/cli.rs
+++ b/crates/cu-profiler-cli/tests/cli.rs
@@ -296,6 +296,35 @@ fn bench_rejects_an_invalid_plan() {
     assert!(String::from_utf8_lossy(&out.stderr).contains("base58"));
 }
 
+#[test]
+fn bench_with_program_name_errors_clearly_without_executor() {
+    let dir = scratch_dir("bench-noexec");
+    let fixtures = dir.join("bench.toml");
+    std::fs::write(
+        &fixtures,
+        "[[instruction]]\nscenario=\"swap\"\nprogram_id=\"11111111111111111111111111111111\"\n",
+    )
+    .unwrap();
+
+    // Asking to measure (--program-name) when the Linux `cu-profiler-bench` executor
+    // is not on PATH must fail clearly with the exact command to run — never silently
+    // pretend to have measured.
+    let out = run(
+        &dir,
+        &[
+            "bench",
+            "--fixtures",
+            fixtures.to_str().unwrap(),
+            "--program-name",
+            "demo",
+        ],
+    );
+    assert!(!out.status.success(), "should fail without the executor");
+    let stderr = String::from_utf8_lossy(&out.stderr);
+    assert!(stderr.contains("cu-profiler-bench"), "stderr: {stderr}");
+    assert!(stderr.contains("not found"), "stderr: {stderr}");
+}
+
 #[cfg(feature = "anchor")]
 #[test]
 fn anchor_idl_labels_program_in_report() {
diff --git a/docs/reference.md b/docs/reference.md
index ab90919..f25729e 100644
--- a/docs/reference.md
+++ b/docs/reference.md
@@ -594,13 +594,13 @@ Reads an existing report and shows analysis without re-simulating.
 
 ### `cu-profiler bench`
 
-Turnkey real-CU path (scaffolding). Reads a declarative bench plan (`bench.toml`),
-validates it, and resolves or builds the program `.so`. Flags:
+Turnkey real-CU path. Reads a declarative bench plan (`bench.toml`), validates it,
+optionally builds the program, and — with `--program-name` — measures real compute
+units by delegating to the Linux `cu-profiler-bench` executor (see below). Flags:
 
 ```
 --fixtures       bench plan file (default bench.toml)
---program        path to a compiled .so (skips building)
---program-name   .so stem to locate under $SBF_OUT_DIR / target/deploy
+--program-name   .so stem (loaded from $SBF_OUT_DIR); with it, bench measures
 --build          run `cargo build-sbf` first
 --manifest-path  directory to build in (default .)
 ```
@@ -620,9 +620,12 @@ data       = "01ab"               # hex instruction data
   lamports = 1000000
 ```
 
-Live compute-unit execution runs on the Linux-only `cu-profiler-mollusk` backend
-(the Solana/SBF stack is not buildable on every host); this command validates and
-prepares the plan, and the Mollusk execution that produces real CU is wired on top.
+Real measurement is performed by `cu-profiler-bench`, a Linux-only executor built
+from the `cu-profiler-mollusk` crate (it links the Solana/Mollusk stack, which is not
+buildable on every host). `cu-profiler bench --program-name <p>` runs it over `PATH` —
+a runtime sibling, not a build dependency, so the main CLI stays Solana-free. Without
+the executor installed, `bench` validates the plan and fails with the exact command to
+run; without `--program-name`, it validates the plan only.
 
 ---