diff --git a/CHANGELOG.md b/CHANGELOG.md index d2d236b..de1396f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,14 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). folds into the confidence score (CV ≥2% → Medium, ≥10% → Low), implementing the spec §12 "sample variance" factor. The deterministic recorded backend ignores `samples` and never fabricates a spread (`ExecutionBackend::is_deterministic`). +- **`cu-profiler bench` (turnkey real-CU).** A declarative bench-plan schema + (`cu_profiler_core::bench::BenchPlan`: instructions, program id, hex data, accounts) + with base58/hex validation, and a `bench` subcommand that validates the plan, + optionally builds the program (`--build` via `cargo build-sbf`), and — with + `--program-name` — measures real compute units by delegating to the Linux + `cu-profiler-bench` executor over `PATH`. The executor links the Solana stack and is + a runtime sibling, not a build dependency, so the main CLI stays Solana-free; when + it is absent, `bench` validates the plan and fails with the exact command to run. ## [0.1.2] - 2026-06-20 diff --git a/crates/cu-profiler-cli/src/args.rs b/crates/cu-profiler-cli/src/args.rs index 4c08f9e..3b4e614 100644 --- a/crates/cu-profiler-cli/src/args.rs +++ b/crates/cu-profiler-cli/src/args.rs @@ -48,6 +48,8 @@ pub enum Command { Import(ImportArgs), /// Post the Markdown report as a sticky pull-request comment. Comment(CommentArgs), + /// Turnkey real-CU path: validate a bench plan and measure via cu-profiler-bench. + Bench(BenchArgs), } /// Inputs shared by `run`, `ci` and `compare`. @@ -250,6 +252,30 @@ pub struct CommentArgs { pub dry_run: bool, } +/// `cu-profiler bench` — turnkey real-CU path. +/// +/// Validates a declarative bench plan and, with `--program-name`, measures real +/// compute units via the Linux `cu-profiler-bench` executor. +#[derive(Debug, Args)] +pub struct BenchArgs { + /// Bench fixture file (`[[instruction]]` declarations with accounts/data). + #[arg(long, default_value = "bench.toml")] + pub fixtures: PathBuf, + + /// Program name (the `.so` stem, loaded from `$SBF_OUT_DIR`). With it, `bench` + /// measures via the `cu-profiler-bench` executor; without it, validate only. + #[arg(long)] + pub program_name: Option, + + /// Build the program with `cargo build-sbf` before benchmarking. + #[arg(long)] + pub build: bool, + + /// Directory to run `cargo build-sbf` in. + #[arg(long, default_value = ".")] + pub manifest_path: PathBuf, +} + /// `cu-profiler init`. #[derive(Debug, Args)] pub struct InitArgs { diff --git a/crates/cu-profiler-cli/src/commands/bench.rs b/crates/cu-profiler-cli/src/commands/bench.rs new file mode 100644 index 0000000..119a940 --- /dev/null +++ b/crates/cu-profiler-cli/src/commands/bench.rs @@ -0,0 +1,113 @@ +//! `cu-profiler bench` — turnkey real-CU path. +//! +//! `bench` validates a declarative [`BenchPlan`](cu_profiler_core::bench::BenchPlan), +//! optionally builds the program with `cargo build-sbf`, then **delegates the real +//! Mollusk measurement** to the Linux-only `cu-profiler-bench` executor, found over +//! `PATH` (a runtime sibling, never a build dependency — so the main CLI keeps the +//! Solana/Mollusk stack out and stays Windows-buildable). +//! +//! - With `--program-name`: run the executor and forward its result; if the executor +//! is not installed, fail with the exact command to run (no silent half-measure). +//! - Without `--program-name`: validate the plan and summarise it (a lint/prepare run). + +use std::path::Path; +use std::process::Command; + +use cu_profiler_core::bench::BenchPlan; +use cu_profiler_core::{Error, Result}; + +use crate::args::BenchArgs; +use crate::commands::{MAX_LOG_BYTES, read_to_string_capped}; +use crate::exit::ExitCode; + +/// The Linux-only sibling binary that performs the real Mollusk measurement. +const EXECUTOR: &str = "cu-profiler-bench"; + +/// Execute the `bench` command. +pub fn run(args: &BenchArgs, quiet: bool) -> Result { + let text = read_to_string_capped(&args.fixtures, MAX_LOG_BYTES)?; + let plan = BenchPlan::from_toml(&text)?; + + if args.build { + build_sbf(&args.manifest_path, quiet)?; + } + + // With a program, measure for real via the executor; without one, validate only. + let Some(program_name) = &args.program_name else { + if !quiet { + summarise(&plan); + } + return Ok(ExitCode::Success); + }; + + match delegate(&args.fixtures, program_name) { + Some(code) => Ok(code), + None => Err(Error::Simulation(format!( + "plan is valid, but the `{EXECUTOR}` executor was not found on PATH, so no compute \ + units were measured. It is Linux-only (built from the cu-profiler-mollusk crate, \ + which links the Solana stack). Install it, then run:\n \ + {EXECUTOR} --fixtures {} --program-name {program_name}", + args.fixtures.display() + ))), + } +} + +/// Run `cargo build-sbf` in `dir` to compile the program to an `.so`. +fn build_sbf(dir: &Path, quiet: bool) -> Result<()> { + if !quiet { + eprintln!( + "building program with `cargo build-sbf` in {}…", + dir.display() + ); + } + let status = Command::new("cargo") + .arg("build-sbf") + .current_dir(dir) + .status() + .map_err(|e| { + Error::Simulation(format!( + "could not run `cargo build-sbf` (is the Solana SBF toolchain installed?): {e}" + )) + })?; + if !status.success() { + return Err(Error::Simulation( + "`cargo build-sbf` failed — see its output above".to_string(), + )); + } + Ok(()) +} + +/// Run the `cu-profiler-bench` executor, inheriting its stdout/stderr and returning a +/// mapped exit code — or `None` when the executor is not on `PATH`. +fn delegate(fixtures: &Path, program_name: &str) -> Option { + let status = Command::new(EXECUTOR) + .arg("--fixtures") + .arg(fixtures) + .arg("--program-name") + .arg(program_name) + .status() + .ok()?; + Some(if status.success() { + ExitCode::Success + } else { + ExitCode::Simulation + }) +} + +/// Validate-only output: print the parsed plan and how to measure it. +fn summarise(plan: &BenchPlan) { + println!("bench plan OK: {} instruction(s)", plan.instructions.len()); + for ix in &plan.instructions { + println!( + " - {} → program {} ({} account(s), {} data byte(s))", + ix.scenario, + ix.program_id, + ix.accounts.len(), + ix.data.len() / 2, + ); + } + eprintln!( + "note: plan validated. Pass --program-name and have the `{EXECUTOR}` executor on PATH \ + (Linux; from the cu-profiler-mollusk crate) to measure real compute units." + ); +} diff --git a/crates/cu-profiler-cli/src/commands/mod.rs b/crates/cu-profiler-cli/src/commands/mod.rs index 4a92f12..b0369a8 100644 --- a/crates/cu-profiler-cli/src/commands/mod.rs +++ b/crates/cu-profiler-cli/src/commands/mod.rs @@ -2,6 +2,7 @@ //! `cu-profiler-core`, render with `cu-profiler-report`, choose an exit code. mod baseline; +mod bench; mod ci; mod comment; mod compare; @@ -12,6 +13,7 @@ mod inspect; mod run; pub use baseline::{approve as baseline_approve, save as baseline_save}; +pub use bench::run as bench; pub use ci::run as ci; pub use comment::run as comment; pub use compare::run as compare; diff --git a/crates/cu-profiler-cli/src/main.rs b/crates/cu-profiler-cli/src/main.rs index 8516c49..b0c4012 100644 --- a/crates/cu-profiler-cli/src/main.rs +++ b/crates/cu-profiler-cli/src/main.rs @@ -40,6 +40,7 @@ fn dispatch(cli: &Cli) -> Result { Command::Inspect(args) => commands::inspect(args, quiet), Command::Import(args) => commands::import(args, quiet), Command::Comment(args) => commands::comment(args, quiet), + Command::Bench(args) => commands::bench(args, quiet), Command::Baseline(args) => match &args.command { BaselineCommand::Save(a) => commands::baseline_save(a, quiet), BaselineCommand::Approve(a) => commands::baseline_approve(a, quiet), diff --git a/crates/cu-profiler-cli/tests/cli.rs b/crates/cu-profiler-cli/tests/cli.rs index daafa0b..5d65463 100644 --- a/crates/cu-profiler-cli/tests/cli.rs +++ b/crates/cu-profiler-cli/tests/cli.rs @@ -260,6 +260,71 @@ fn import_file_without_logs_reports_error() { assert!(!dir.join(".cu/logs/empty.log").exists()); } +#[test] +fn bench_validates_a_plan_and_summarises() { + let dir = scratch_dir("bench-ok"); + let fixtures = dir.join("bench.toml"); + std::fs::write( + &fixtures, + "[[instruction]]\nscenario=\"swap\"\nprogram_id=\"11111111111111111111111111111111\"\ndata=\"01ab\"\n", + ) + .unwrap(); + + let out = run(&dir, &["bench", "--fixtures", fixtures.to_str().unwrap()]); + assert!(out.status.success(), "bench failed: {out:?}"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("bench plan OK: 1 instruction"), + "summary: {stdout}" + ); + assert!(stdout.contains("swap"), "scenario: {stdout}"); +} + +#[test] +fn bench_rejects_an_invalid_plan() { + let dir = scratch_dir("bench-bad"); + let fixtures = dir.join("bench.toml"); + // Non-base58 program id must be rejected with a non-zero exit. + std::fs::write( + &fixtures, + "[[instruction]]\nscenario=\"s\"\nprogram_id=\"not-valid-0OIl\"\n", + ) + .unwrap(); + + let out = run(&dir, &["bench", "--fixtures", fixtures.to_str().unwrap()]); + assert!(!out.status.success(), "expected invalid plan to fail"); + assert!(String::from_utf8_lossy(&out.stderr).contains("base58")); +} + +#[test] +fn bench_with_program_name_errors_clearly_without_executor() { + let dir = scratch_dir("bench-noexec"); + let fixtures = dir.join("bench.toml"); + std::fs::write( + &fixtures, + "[[instruction]]\nscenario=\"swap\"\nprogram_id=\"11111111111111111111111111111111\"\n", + ) + .unwrap(); + + // Asking to measure (--program-name) when the Linux `cu-profiler-bench` executor + // is not on PATH must fail clearly with the exact command to run — never silently + // pretend to have measured. + let out = run( + &dir, + &[ + "bench", + "--fixtures", + fixtures.to_str().unwrap(), + "--program-name", + "demo", + ], + ); + assert!(!out.status.success(), "should fail without the executor"); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!(stderr.contains("cu-profiler-bench"), "stderr: {stderr}"); + assert!(stderr.contains("not found"), "stderr: {stderr}"); +} + #[test] fn comment_dry_run_renders_sticky_body_from_config() { let dir = scratch_dir("comment-dry"); diff --git a/crates/cu-profiler-core/src/bench.rs b/crates/cu-profiler-core/src/bench.rs new file mode 100644 index 0000000..4585b0d --- /dev/null +++ b/crates/cu-profiler-core/src/bench.rs @@ -0,0 +1,229 @@ +//! Declarative fixtures for the turnkey real-CU `bench` path. +//! +//! A [`BenchPlan`] describes, as plain data, the instruction(s) to execute against +//! a compiled Solana program so a live backend (Mollusk) can measure real compute +//! units — no hand-written Rust harness required. This module owns only the +//! **schema, parsing and validation**; it pulls in no Solana crates and runs +//! everywhere the core does (including Windows). Converting a validated plan into +//! `solana-instruction`/`solana-account` types and executing it lives in the +//! Linux-only `cu-profiler-mollusk` integration crate. +//! +//! ```toml +//! # bench.toml +//! [[instruction]] +//! scenario = "swap_exact_in" +//! program_id = "SwapPRogram1111111111111111111111111111" +//! data = "01ab" # hex-encoded instruction data +//! +//! [[instruction.account]] +//! pubkey = "11111111111111111111111111111111" +//! signer = true +//! writable = true +//! lamports = 1000000 +//! ``` + +use serde::{Deserialize, Serialize}; + +use crate::error::{Error, Result}; + +/// A set of instruction fixtures to benchmark, parsed from a `bench.toml`. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)] +#[serde(deny_unknown_fields)] +pub struct BenchPlan { + /// One entry per instruction to execute and measure. + #[serde(default, rename = "instruction")] + pub instructions: Vec, +} + +/// A single instruction to execute against the program under test. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct InstructionFixture { + /// The scenario name this instruction measures (keys it to a `[scenario.]`). + pub scenario: String, + /// The program's base58 address. + pub program_id: String, + /// Hex-encoded instruction data (empty string for a no-arg instruction). + #[serde(default)] + pub data: String, + /// Accounts passed to the instruction, in order. + #[serde(default, rename = "account")] + pub accounts: Vec, +} + +/// One account in an instruction's account list. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct AccountFixture { + /// The account's base58 address. + pub pubkey: String, + /// Whether the account signs the transaction. + #[serde(default)] + pub signer: bool, + /// Whether the instruction may write to the account. + #[serde(default)] + pub writable: bool, + /// Starting lamport balance. + #[serde(default)] + pub lamports: u64, + /// Owning program (base58), if the account should be pre-owned. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub owner: Option, + /// Hex-encoded initial account data, if any. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub data: Option, +} + +impl BenchPlan { + /// Parse and validate a plan from TOML. + /// + /// # Errors + /// Returns [`Error::Config`] for malformed TOML, an unknown key, a non-base58 + /// program/account address, or non-hex instruction/account data. + pub fn from_toml(s: &str) -> Result { + let plan: BenchPlan = toml::from_str(s).map_err(|e| Error::Config(e.to_string()))?; + plan.validate()?; + Ok(plan) + } + + /// Validate every fixture's addresses and encodings. + /// + /// # Errors + /// Returns [`Error::Config`] describing the first invalid field found. + pub fn validate(&self) -> Result<()> { + if self.instructions.is_empty() { + return Err(Error::Config( + "bench plan has no `[[instruction]]` entries".to_string(), + )); + } + for ix in &self.instructions { + ix.validate()?; + } + Ok(()) + } +} + +impl InstructionFixture { + fn validate(&self) -> Result<()> { + let ctx = format!("instruction `{}`", self.scenario); + if self.scenario.is_empty() { + return Err(Error::Config( + "an instruction has an empty `scenario`".to_string(), + )); + } + validate_base58(&self.program_id, &format!("{ctx}: program_id"))?; + validate_hex(&self.data, &format!("{ctx}: data"))?; + for acc in &self.accounts { + validate_base58(&acc.pubkey, &format!("{ctx}: account pubkey"))?; + if let Some(owner) = &acc.owner { + validate_base58(owner, &format!("{ctx}: account owner"))?; + } + if let Some(data) = &acc.data { + validate_hex(data, &format!("{ctx}: account data"))?; + } + } + Ok(()) + } +} + +/// The base58 alphabet Solana uses (Bitcoin alphabet: no `0`, `O`, `I`, `l`). +const BASE58_ALPHABET: &[u8] = b"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"; + +/// Validate that `s` looks like a base58-encoded 32-byte Solana address. +/// +/// This checks the alphabet and the length window a 32-byte value encodes to +/// (32–44 characters); it does not decode (which would need a base58 dependency). +fn validate_base58(s: &str, what: &str) -> Result<()> { + if !(32..=44).contains(&s.len()) { + return Err(Error::Config(format!( + "{what}: `{s}` is not a 32-byte base58 address (length {})", + s.len() + ))); + } + if let Some(bad) = s.bytes().find(|b| !BASE58_ALPHABET.contains(b)) { + return Err(Error::Config(format!( + "{what}: `{s}` contains a non-base58 character `{}`", + bad as char + ))); + } + Ok(()) +} + +/// Validate that `s` is valid hex (even length, hex digits only). Empty is allowed. +fn validate_hex(s: &str, what: &str) -> Result<()> { + if s.len() % 2 != 0 { + return Err(Error::Config(format!( + "{what}: hex string has an odd length ({})", + s.len() + ))); + } + if let Some(bad) = s.bytes().find(|b| !b.is_ascii_hexdigit()) { + return Err(Error::Config(format!( + "{what}: `{s}` contains a non-hex character `{}`", + bad as char + ))); + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + const SYS: &str = "11111111111111111111111111111111"; + + fn plan_toml() -> String { + format!( + "[[instruction]]\nscenario=\"swap\"\nprogram_id=\"{SYS}\"\ndata=\"01ab\"\n\ + [[instruction.account]]\npubkey=\"{SYS}\"\nsigner=true\nwritable=true\nlamports=1000000\n" + ) + } + + #[test] + fn parses_and_validates_a_plan() { + let plan = BenchPlan::from_toml(&plan_toml()).unwrap(); + assert_eq!(plan.instructions.len(), 1); + let ix = &plan.instructions[0]; + assert_eq!(ix.scenario, "swap"); + assert_eq!(ix.data, "01ab"); + assert_eq!(ix.accounts.len(), 1); + assert!(ix.accounts[0].signer && ix.accounts[0].writable); + } + + #[test] + fn empty_plan_is_rejected() { + assert!(BenchPlan::from_toml("").is_err()); + } + + #[test] + fn rejects_unknown_keys() { + let toml = format!("[[instruction]]\nscenario=\"s\"\nprogram_id=\"{SYS}\"\nbogus=1\n"); + assert!(BenchPlan::from_toml(&toml).is_err()); + } + + #[test] + fn rejects_bad_base58_program_id() { + let toml = "[[instruction]]\nscenario=\"s\"\nprogram_id=\"not-base58-0OIl\"\n"; + let err = BenchPlan::from_toml(toml).unwrap_err().to_string(); + assert!(err.contains("base58"), "{err}"); + } + + #[test] + fn rejects_odd_and_nonhex_data() { + let odd = format!("[[instruction]]\nscenario=\"s\"\nprogram_id=\"{SYS}\"\ndata=\"abc\"\n"); + assert!( + BenchPlan::from_toml(&odd) + .unwrap_err() + .to_string() + .contains("odd") + ); + let nonhex = + format!("[[instruction]]\nscenario=\"s\"\nprogram_id=\"{SYS}\"\ndata=\"zz\"\n"); + assert!( + BenchPlan::from_toml(&nonhex) + .unwrap_err() + .to_string() + .contains("non-hex") + ); + } +} diff --git a/crates/cu-profiler-core/src/lib.rs b/crates/cu-profiler-core/src/lib.rs index 8804273..5b2aaed 100644 --- a/crates/cu-profiler-core/src/lib.rs +++ b/crates/cu-profiler-core/src/lib.rs @@ -41,6 +41,7 @@ pub mod anchor; pub mod backend; pub mod baseline; +pub mod bench; pub mod budget; pub mod confidence; pub mod config; diff --git a/docs/reference.md b/docs/reference.md index 5c14fb5..de7fea4 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -618,6 +618,41 @@ Diagnoses a single scenario. Reads an existing report and shows analysis without re-simulating. +### `cu-profiler bench` + +Turnkey real-CU path. Reads a declarative bench plan (`bench.toml`), validates it, +optionally builds the program, and — with `--program-name` — measures real compute +units by delegating to the Linux `cu-profiler-bench` executor (see below). Flags: + +``` +--fixtures bench plan file (default bench.toml) +--program-name .so stem (loaded from $SBF_OUT_DIR); with it, bench measures +--build run `cargo build-sbf` first +--manifest-path directory to build in (default .) +``` + +A `bench.toml` declares the instruction(s) to execute as data: + +```toml +[[instruction]] +scenario = "swap_exact_in" +program_id = "SwapPRogram1111111111111111111111111111" +data = "01ab" # hex instruction data + + [[instruction.account]] + pubkey = "11111111111111111111111111111111" + signer = true + writable = true + lamports = 1000000 +``` + +Real measurement is performed by `cu-profiler-bench`, a Linux-only executor built +from the `cu-profiler-mollusk` crate (it links the Solana/Mollusk stack, which is not +buildable on every host). `cu-profiler bench --program-name

` runs it over `PATH` — +a runtime sibling, not a build dependency, so the main CLI stays Solana-free. Without +the executor installed, `bench` validates the plan and fails with the exact command to +run; without `--program-name`, it validates the plan only. + --- ## 16. Exit Codes