Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,15 @@ jobs:
run: cargo +nightly fmt --check

lint-clippy:
name: Lint (clippy)
name: Lint (clippy, ${{ matrix.features }})
needs: detect-changes
if: needs.detect-changes.outputs.run-full-ci == 'true'
runs-on: ubuntu-latest
timeout-minutes: 10
strategy:
fail-fast: false
matrix:
features: [full, bench]
env:
RUSTC_WRAPPER: sccache
SCCACHE_GHA_ENABLED: "true"
Expand All @@ -143,10 +147,10 @@ jobs:
- uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2
with:
cache-targets: "false"
shared-key: "ci"
shared-key: "ci-clippy-${{ matrix.features }}"
- uses: mozilla-actions/sccache-action@7d986dd989559c6ecdb630a3fd2557667be217ad # v0.0.9
- name: Clippy
run: cargo clippy --profile ci --workspace --features full -- -D warnings
run: cargo clippy --profile ci --workspace --features ${{ matrix.features }} -- -D warnings

build-tests:
name: Build Tests (${{ matrix.os }})
Expand Down Expand Up @@ -334,7 +338,7 @@ jobs:
strategy:
fail-fast: false
matrix:
bundle: [desktop, ide, server, chat, ml]
bundle: [desktop, ide, server, chat, ml, bench]
include:
- bundle: ml
allow_failure: true
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

### Added

- **`zeph-bench` crate scaffold with `BenchmarkChannel`** (`#2828`): new optional crate `crates/zeph-bench/` gated on the `bench` feature flag (not included in `full`). `BenchmarkChannel` implements the `Channel` trait for headless benchmark execution: `recv()` drains an injected prompt queue, `send()`/`send_chunk()`/`flush_chunks()` accumulate LLM responses into a capture buffer, `send_usage()` records token stats, `confirm()` auto-approves, `elicit()` returns `Declined`, `send_tool_output()` is a no-op (tool outputs excluded from benchmark metrics). `DatasetRegistry` lists the 5 supported datasets: LongMemEval, LOCOMO, FRAMES, tau-bench, GAIA.

- **`zeph bench` CLI subcommand** (`#2829`): top-level `bench` subcommand added to the `zeph` binary, gated on `#[cfg(feature = "bench")]`. Subcommands: `list` (print all datasets with cache status), `download --dataset <name>` (fetch and cache a dataset), `run --dataset <name> --output <path> [--scenario <id>] [--provider <name>] [--baseline] [--resume] [--no-deterministic]` (execute a benchmark), `show --results <path>` (pretty-print a results JSON file). Unknown dataset names and missing cache exit with code 1 and a diagnostic message.

- **Deterministic mode for benchmark runs** (`#2831`): bench runner applies `GenerationOverrides { temperature: Some(0.0), seed: Some(0) }` to the active LLM provider before constructing the agent, ensuring reproducible results across runs. Disabled with `--no-deterministic`.

- **Supervised bounded background task management** (`#2816`, `#2821`): introduced `BackgroundSupervisor` in `zeph-core` with per-class concurrency limits (Enrichment=4, Telemetry=8) and drop-on-overflow policy. Background tasks use an `InflightGuard` drop-guard to free concurrency slots immediately on completion. Metrics (`bg_inflight`, `bg_dropped`, `bg_completed`) added to `AgentMetrics`. `persist_message()` refactored into two phases: foreground commit (SQLite/Qdrant write, essential metrics) and background enrichment (summarization, graph extraction, persona extraction, trajectory extraction). Two fire-and-forget `tokio::spawn` sites in `corrections.rs` migrated to the supervisor. Foreground turns no longer await enrichment work; tail latency from post-persist processing is eliminated.

- **Bounded Candle inference worker** (`#2818`): replaced `Arc<Mutex<ModelWeights>>` with a
Expand Down
15 changes: 15 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ uuid = "1.23"
wiremock = "0.6.5"
zeroize = { version = "1", default-features = false }
zeph-a2a = { path = "crates/zeph-a2a", version = "0.18.5" }
zeph-bench = { path = "crates/zeph-bench", version = "0.18.6" }
zeph-acp = { path = "crates/zeph-acp", version = "0.18.5" }
zeph-db = { path = "crates/zeph-db", version = "0.18.5", default-features = false }
zeph-channels = { path = "crates/zeph-channels", version = "0.18.5" }
Expand Down Expand Up @@ -175,6 +176,7 @@ server = ["gateway", "a2a", "otel"]
chat = ["discord", "slack"]
ml = ["candle", "pdf"]
full = ["desktop", "ide", "server", "chat", "pdf", "scheduler", "classifiers"]
bench = ["dep:zeph-bench"]

# === Individual feature flags ===
a2a = ["dep:zeph-a2a", "zeph-a2a?/server", "zeph-a2a?/ibct"]
Expand Down Expand Up @@ -245,6 +247,7 @@ zeph-skills.workspace = true
zeph-subagent.workspace = true
zeph-tools.workspace = true
zeph-tui = { workspace = true, optional = true }
zeph-bench = { workspace = true, optional = true }

[dev-dependencies]
serial_test.workspace = true
Expand Down
26 changes: 26 additions & 0 deletions crates/zeph-bench/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
[package]
name = "zeph-bench"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
repository.workspace = true
homepage.workspace = true
documentation.workspace = true
keywords.workspace = true
categories.workspace = true
description = "Benchmark harness for evaluating Zeph agent performance on standardized datasets"
publish = true

[dependencies]
clap = { workspace = true, features = ["derive"] }
serde.workspace = true
serde_json.workspace = true
thiserror.workspace = true
tokio = { workspace = true, features = ["time"] }
zeph-config.workspace = true
zeph-core.workspace = true
zeph-llm.workspace = true

[lints]
workspace = true
Loading
Loading