From 8f064df14df8fd6dd0c2c42683d1309e4baab0d6 Mon Sep 17 00:00:00 2001 From: panos-xyz Date: Wed, 22 Apr 2026 20:27:06 +0800 Subject: [PATCH 1/6] chore(evm): silence clippy unnecessary_sort_by in curie.rs test Clippy 1.95 reports `storage.sort_by(|(a, _), (b, _)| a.cmp(b))` as a `clippy::unnecessary_sort_by` (prefer `sort_by_key`) in the `apply_curie_hard_fork` test. The fix is the clippy-suggested rewrite. No behavioural change; only the test's sort comparator is reshaped. --- crates/evm/src/block/curie.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/evm/src/block/curie.rs b/crates/evm/src/block/curie.rs index 36c7358..c3092ac 100644 --- a/crates/evm/src/block/curie.rs +++ b/crates/evm/src/block/curie.rs @@ -126,7 +126,7 @@ mod tests { .storage .into_iter() .collect::>(); - storage.sort_by(|(a, _), (b, _)| a.cmp(b)); + storage.sort_by_key(|(a, _)| *a); let expected_storage = [ (GPO_L1_BLOB_BASE_FEE_SLOT, INITIAL_L1_BLOB_BASE_FEE), From dec293ec41a4f4a59a205a54069d5952466c052a Mon Sep 17 00:00:00 2001 From: panos-xyz Date: Fri, 24 Apr 2026 10:02:45 +0800 Subject: [PATCH 2/6] chore(cargo): add workspace build profile gradient MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit morph-reth's root Cargo.toml previously defined no [profile.*] overrides, so release builds fell back to Cargo defaults (lto=off) — strictly worse than upstream reth's own release. Define an explicit profile gradient: - profile.dev: line-tables-only debug + unpacked split-debuginfo. - profile.hivetests: opt-level=3 + lto=thin, inherits test (debug_assert on). - profile.release: opt-level=3, lto=thin, strip=symbols, codegen-units=16. - profile.profiling: release + line-table debug + strip=none + incremental — production binaries that still flamegraph cleanly. - profile.bench: inherits profiling. - profile.maxperf: release + lto=fat + codegen-units=1 for peak throughput (expect a noticeably longer link stage). Local `make build` still defaults to the `release` profile, which now benefits from thin LTO without any user action. --- Cargo.toml | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 901cfcb..7f0ee7f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,48 @@ unnameable-types = "warn" [workspace.lints.rustdoc] all = "warn" +# Speed up compilation time for dev builds by reducing emitted debug info. +# NOTE: Debuggers may provide less useful information with this setting. +# Uncomment this section if you're using a debugger. +[profile.dev] +# https://davidlattimore.github.io/posts/2024/02/04/speeding-up-the-rust-edit-build-run-cycle.html +debug = "line-tables-only" +split-debuginfo = "unpacked" + +# Meant for testing - all optimizations, but with debug assertions and overflow checks. +[profile.hivetests] +inherits = "test" +opt-level = 3 +lto = "thin" + +[profile.release] +opt-level = 3 +lto = "thin" +debug = "none" +strip = "symbols" +panic = "unwind" +codegen-units = 16 + +# Use the `--profile profiling` flag to keep line-table symbols in a release +# build — suitable for production flame graphs. +# e.g. `cargo build --profile profiling` +[profile.profiling] +inherits = "release" +debug = "line-tables-only" +strip = "none" +incremental = true + +# Include debug info in benchmarks too. +[profile.bench] +inherits = "profiling" + +# Maximum runtime performance. Fat LTO + single codegen unit; use for +# throughput-sensitive production binaries (expect noticeably slower link). +[profile.maxperf] +inherits = "release" +lto = "fat" +codegen-units = 1 + [workspace.dependencies] morph-chainspec = { path = "crates/chainspec", default-features = false } morph-consensus = { path = "crates/consensus", default-features = false } From 361866422f50044aa31beaa0f9c11ec76703adc1 Mon Sep 17 00:00:00 2001 From: panos-xyz Date: Fri, 24 Apr 2026 10:03:07 +0800 Subject: [PATCH 3/6] chore(deploy): default Docker and EC2 builds to maxperf Production-deploy paths (the Dockerfile that produces the container image and MakefileEc2.mk that uploads binaries to S3 for EC2 nodes) both flip their default Cargo profile from `release` to `maxperf`. That enables fat LTO + a single codegen unit so the shipped binary is 2-5% faster at runtime than the default release profile. Tradeoff: symbols are stripped, so flame graphs and symbolicated backtraces require a one-off `--build-arg BUILD_PROFILE=profiling` (or `PROFILE=profiling make ...`) rebuild. Image/link time also roughly doubles. Both are acceptable for an execution client where block- execution throughput is the hot metric and prod incidents are rare. Local `make build` remains on `release` so everyday iteration is unchanged. --- Dockerfile | 8 ++++++-- MakefileEc2.mk | 6 +++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 856f8bb..476c5dc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,8 +15,12 @@ RUN cargo chef prepare --recipe-path recipe.json FROM chef AS builder COPY --from=planner /app/recipe.json recipe.json -# Build profile, release by default -ARG BUILD_PROFILE=release +# Build profile. Defaults to `maxperf` (fat LTO + single codegen unit) for +# peak throughput in production containers. Link time is noticeably longer +# but amortized across every block executed. Override with +# `--build-arg BUILD_PROFILE=profiling` to keep line-table symbols for +# flame graphs, or `release` for the slimmer/stripped variant. +ARG BUILD_PROFILE=maxperf ENV BUILD_PROFILE=$BUILD_PROFILE # Extra Cargo flags diff --git a/MakefileEc2.mk b/MakefileEc2.mk index 315eb1e..b2584c7 100644 --- a/MakefileEc2.mk +++ b/MakefileEc2.mk @@ -5,7 +5,11 @@ DIST_DIR = dist BINARY = morph-reth TARBALL = morph-reth.tar.gz CARGO_TARGET_DIR ?= target -PROFILE ?= release +# Production deploys go to EC2 via S3. Default to `maxperf` (fat LTO + +# single codegen unit) for peak throughput on prod nodes — matches the +# Dockerfile default. Override with `PROFILE=profiling` to keep line-table +# symbols for flame graphs when diagnosing a prod incident. +PROFILE ?= maxperf define cargo_build_and_upload if [ ! -d $(DIST_DIR) ]; then mkdir -p $(DIST_DIR); fi From 68b1d762863d370de7b5b2d99e2b9543eae02aa1 Mon Sep 17 00:00:00 2001 From: panos-xyz Date: Fri, 24 Apr 2026 10:03:30 +0800 Subject: [PATCH 4/6] ci(release): parameterize cargo profile; default tag-push to maxperf Previously the release workflow hardcoded `target//release/` when packaging tarballs, which coupled it to the Makefile's `release` default and silently broke if either was overridden. - Add a `profile` workflow_dispatch input (default `profiling`, with `maxperf` and `release` as alternatives). Push-tag triggers keep defaulting to `maxperf` for peak throughput on public releases. - Resolve the profile in a dedicated step and thread it through both `make build-` (via PROFILE=) and the subsequent `cp` path (using the computed `profile_dir`, which handles Cargo's dev->debug naming quirk). --- .github/workflows/release.yml | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a0cf6b7..bfbc27d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,6 +12,14 @@ on: description: "Dry run (skip release creation)" type: boolean default: true + profile: + description: "Cargo profile for release binaries" + type: choice + default: "profiling" + options: + - profiling + - maxperf + - release permissions: contents: read @@ -102,13 +110,33 @@ jobs: - name: Install cross main run: cargo install cross --git https://github.com/cross-rs/cross + - name: Resolve build profile + id: profile + run: | + # Tag push -> maxperf (peak throughput for public releases). + # workflow_dispatch -> user-selected profile (defaults to profiling). + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + PROFILE="${{ inputs.profile }}" + else + PROFILE="maxperf" + fi + # Cargo's target dir is "debug" for dev, otherwise the profile name. + if [[ "$PROFILE" == "dev" ]]; then + PROFILE_DIR="debug" + else + PROFILE_DIR="$PROFILE" + fi + echo "profile=$PROFILE" >> "$GITHUB_OUTPUT" + echo "profile_dir=$PROFILE_DIR" >> "$GITHUB_OUTPUT" + echo "Resolved profile: $PROFILE (target dir: $PROFILE_DIR)" + - name: Build binary - run: make build-${{ matrix.target }} + run: make build-${{ matrix.target }} PROFILE=${{ steps.profile.outputs.profile }} - name: Package binary run: | mkdir -p dist - cp target/${{ matrix.target }}/release/morph-reth dist/ + cp target/${{ matrix.target }}/${{ steps.profile.outputs.profile_dir }}/morph-reth dist/ cd dist tar czf ../${{ matrix.archive }}.tar.gz morph-reth cd .. From 72e2a4fead6448588378a2e2ffb882f203b8ec1a Mon Sep 17 00:00:00 2001 From: panos-xyz Date: Fri, 24 Apr 2026 14:42:54 +0800 Subject: [PATCH 5/6] chore(build): enable x86-64-v3 baseline for x86_64 release artifacts Mirror upstream reth release defaults: x86_64 binaries target the x86-64-v3 baseline (Haswell+ / Excavator+, ~2013 onward) plus the pclmulqdq extension (carry-less multiply, used by keccak/GHASH but not auto-implied by v3). aarch64 is left untouched. - Makefile cross-build: target-specific RUSTFLAGS_ARCH so build-x86_64-* picks up the flags while build-aarch64-* stays clean. - Dockerfile: TARGETPLATFORM-conditional so multi-arch buildx builds get v3 only on linux/amd64. - MakefileEc2.mk: uname -m guard so EC2 native builds opt in on x86_64 and skip on Graviton ARM hosts. Pre-2013 Intel and pre-2015 AMD CPUs will SIGILL on these binaries; pass an empty RUSTFLAGS override for those hosts. --- Dockerfile | 22 +++++++++++++++++----- Makefile | 14 ++++++++++---- MakefileEc2.mk | 14 +++++++++++++- 3 files changed, 40 insertions(+), 10 deletions(-) diff --git a/Dockerfile b/Dockerfile index 476c5dc..42e74a8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,16 +23,28 @@ COPY --from=planner /app/recipe.json recipe.json ARG BUILD_PROFILE=maxperf ENV BUILD_PROFILE=$BUILD_PROFILE -# Extra Cargo flags -ARG RUSTFLAGS="" -ENV RUSTFLAGS="$RUSTFLAGS" +# Architecture-conditional RUSTFLAGS: +# - linux/amd64 (default): enable x86-64-v3 baseline (Haswell+ / Excavator+) +# plus pclmulqdq (carry-less multiply, used by keccak/GHASH but not auto- +# enabled by v3). Matches upstream reth release defaults. Pre-2013 Intel +# and pre-2015 AMD CPUs will SIGILL on these binaries — pass an explicit +# `--build-arg RUSTFLAGS=""` for those hosts. +# - other platforms (linux/arm64, etc.): no architecture flag. +ARG TARGETPLATFORM +ARG RUSTFLAGS= # Build dependencies (cached layer) -RUN cargo chef cook --profile $BUILD_PROFILE --recipe-path recipe.json +RUN if [ -z "$RUSTFLAGS" ] && [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ + export RUSTFLAGS="-C target-cpu=x86-64-v3 -C target-feature=+pclmulqdq"; \ + fi && \ + cargo chef cook --profile $BUILD_PROFILE --recipe-path recipe.json # Build the application COPY . . -RUN cargo build --profile $BUILD_PROFILE --locked --bin morph-reth +RUN if [ -z "$RUSTFLAGS" ] && [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ + export RUSTFLAGS="-C target-cpu=x86-64-v3 -C target-feature=+pclmulqdq"; \ + fi && \ + cargo build --profile $BUILD_PROFILE --locked --bin morph-reth # Copy binary to a fixed location (ARG not resolved in COPY) RUN cp /app/target/$BUILD_PROFILE/morph-reth /app/morph-reth diff --git a/Makefile b/Makefile index 7d6be40..a6ac5b6 100644 --- a/Makefile +++ b/Makefile @@ -40,15 +40,21 @@ install: ## Build and install the morph-reth binary under `$(CARGO_HOME)/bin`. ##@ Cross Build (requires Docker + cross: cargo install cross --git https://github.com/cross-rs/cross) +# x86_64 release artifacts target the x86-64-v3 baseline (Haswell+ / +# Excavator+), matching upstream reth. `+pclmulqdq` enables carry-less +# multiply (used by keccak/GHASH) which v3 does not auto-imply. Pre-2013 +# Intel and pre-2015 AMD CPUs will SIGILL on these binaries. +build-x86_64-unknown-linux-gnu: RUSTFLAGS_ARCH = -C target-cpu=x86-64-v3 -C target-feature=+pclmulqdq + +# aarch64 needs larger jemalloc page size (64KB pages on some ARM systems) +build-aarch64-unknown-linux-gnu: export JEMALLOC_SYS_WITH_LG_PAGE=16 + # Pattern rule: cross-build for any target, e.g. `make build-x86_64-unknown-linux-gnu` # See: https://github.com/cross-rs/cross/wiki/FAQ#undefined-reference-with-build-std build-%: ## Cross-build morph-reth for a specific target (e.g. build-x86_64-unknown-linux-gnu). - RUSTFLAGS="-C link-arg=-lgcc -Clink-arg=-static-libgcc" \ + RUSTFLAGS="-C link-arg=-lgcc -Clink-arg=-static-libgcc $(RUSTFLAGS_ARCH)" \ cross build --locked --bin morph-reth --target $* --profile "$(PROFILE)" -# aarch64 needs larger jemalloc page size (64KB pages on some ARM systems) -build-aarch64-unknown-linux-gnu: export JEMALLOC_SYS_WITH_LG_PAGE=16 - # Create a `.tar.gz` containing the morph-reth binary for a specific target. define tarball_release_binary cp $(CARGO_TARGET_DIR)/$(PROFILE)/morph-reth $(BIN_DIR)/morph-reth diff --git a/MakefileEc2.mk b/MakefileEc2.mk index b2584c7..ae030bc 100644 --- a/MakefileEc2.mk +++ b/MakefileEc2.mk @@ -11,9 +11,21 @@ CARGO_TARGET_DIR ?= target # symbols for flame graphs when diagnosing a prod incident. PROFILE ?= maxperf +# Architecture-conditional RUSTFLAGS based on the build host's CPU. EC2 +# build hosts native-compile and upload to S3 → prod hosts pull. As long +# as build host and prod host share architecture, the v3 baseline is +# safe for any 2015+ x86_64 EC2 instance type (m5/m6i/c5/c6i/r5/r6i etc.). +# Graviton ARM hosts skip the flag. +ARCH := $(shell uname -m) +ifeq ($(ARCH),x86_64) +RUSTFLAGS_ARCH := -C target-cpu=x86-64-v3 -C target-feature=+pclmulqdq +else +RUSTFLAGS_ARCH := +endif + define cargo_build_and_upload if [ ! -d $(DIST_DIR) ]; then mkdir -p $(DIST_DIR); fi - CARGO_NET_GIT_FETCH_WITH_CLI=true cargo build --bin $(BINARY) --profile "$(PROFILE)" --target-dir "$(CARGO_TARGET_DIR)" + CARGO_NET_GIT_FETCH_WITH_CLI=true RUSTFLAGS="$(RUSTFLAGS_ARCH)" cargo build --bin $(BINARY) --profile "$(PROFILE)" --target-dir "$(CARGO_TARGET_DIR)" cp "$(CARGO_TARGET_DIR)/$(PROFILE)/$(BINARY)" "$(DIST_DIR)/" tar -czvf $(TARBALL) $(DIST_DIR) aws s3 cp $(TARBALL) $(1) From 6f7b550688d62ed2281fbc781ff99cacb43bc775 Mon Sep 17 00:00:00 2001 From: panos-xyz Date: Fri, 24 Apr 2026 20:44:59 +0800 Subject: [PATCH 6/6] chore(deploy): switch prod default from maxperf to profiling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bench on M4 Pro across OLD/NEW(release)/MAXPERF/PROFILING binaries revealed maxperf hits a hard trade-off on ERC20 workloads: metric new(release) maxperf profiling eth median TPS 136,567 149,159 139,734 erc20 median TPS 102,667 94,517 (-10%)103,496 erc20 import p99 12.9ms 26.8ms 10.2ms erc20 import p99.9 23.8ms 90.1ms 21.3ms erc20 import max 59.6ms 456.7ms 39.4ms erc20 >50ms slow 0.017% 0.365% 0.000% Maxperf's +9% eth gain is not worth -10% ERC20 median + 18x slow-block rate vs profiling. Profiling (thin LTO + cgu=16) captures ~80% of the hot-path optimization with zero ERC20 long tail regression, AND ships line-table debug info — exactly what prod needs for incident triage. Switch all three production paths (Dockerfile / MakefileEc2.mk / tag-push release workflow) to default `profiling`. Maxperf remains selectable via override for eth-heavy reference builds. --- .github/workflows/release.yml | 8 +++++--- Dockerfile | 16 ++++++++++------ MakefileEc2.mk | 12 +++++++----- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index bfbc27d..778dfe2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -113,12 +113,14 @@ jobs: - name: Resolve build profile id: profile run: | - # Tag push -> maxperf (peak throughput for public releases). - # workflow_dispatch -> user-selected profile (defaults to profiling). + # Tag push -> profiling (matches Dockerfile + EC2 deploy defaults; + # see PR rationale: maxperf regresses ERC20 long tail). + # workflow_dispatch -> user-selected profile (defaults to profiling, + # `maxperf` available for eth-heavy reference builds). if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then PROFILE="${{ inputs.profile }}" else - PROFILE="maxperf" + PROFILE="profiling" fi # Cargo's target dir is "debug" for dev, otherwise the profile name. if [[ "$PROFILE" == "dev" ]]; then diff --git a/Dockerfile b/Dockerfile index 42e74a8..df0e8a9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,12 +15,16 @@ RUN cargo chef prepare --recipe-path recipe.json FROM chef AS builder COPY --from=planner /app/recipe.json recipe.json -# Build profile. Defaults to `maxperf` (fat LTO + single codegen unit) for -# peak throughput in production containers. Link time is noticeably longer -# but amortized across every block executed. Override with -# `--build-arg BUILD_PROFILE=profiling` to keep line-table symbols for -# flame graphs, or `release` for the slimmer/stripped variant. -ARG BUILD_PROFILE=maxperf +# Build profile. Defaults to `profiling` (thin LTO + line-table debug info). +# Bench data on M4 Pro showed `maxperf` (fat LTO + cgu=1) hitting a hard +# trade-off on ERC20 workloads: peak `eth_transfer` throughput +13% but +# median ERC20 TPS -10% and import_ms long tail catastrophic (p99.9 90ms, +# max 456ms, 0.36% blocks > 50ms vs profiling's 0%). `profiling` keeps +# ~80% of maxperf's hot-path gain, no ERC20 regression, AND gives prod +# binaries line-table symbols so incidents produce clean stack traces. +# Override with `--build-arg BUILD_PROFILE=maxperf` for eth-heavy nodes +# that don't care about long tail, or `release` for slim/stripped. +ARG BUILD_PROFILE=profiling ENV BUILD_PROFILE=$BUILD_PROFILE # Architecture-conditional RUSTFLAGS: diff --git a/MakefileEc2.mk b/MakefileEc2.mk index ae030bc..f423265 100644 --- a/MakefileEc2.mk +++ b/MakefileEc2.mk @@ -5,11 +5,13 @@ DIST_DIR = dist BINARY = morph-reth TARBALL = morph-reth.tar.gz CARGO_TARGET_DIR ?= target -# Production deploys go to EC2 via S3. Default to `maxperf` (fat LTO + -# single codegen unit) for peak throughput on prod nodes — matches the -# Dockerfile default. Override with `PROFILE=profiling` to keep line-table -# symbols for flame graphs when diagnosing a prod incident. -PROFILE ?= maxperf +# Production deploys go to EC2 via S3. Default to `profiling` (thin LTO + +# line-table debug) — matches the Dockerfile default. Bench data showed +# `maxperf` (fat LTO + cgu=1) regresses ERC20 median TPS -10% and explodes +# the import_ms long tail (p99.9 90ms, max 456ms) while only winning eth- +# transfer by 7%. `profiling` keeps most of the throughput gain, no ERC20 +# regression, and ships line-table symbols for incident diagnosis. +PROFILE ?= profiling # Architecture-conditional RUSTFLAGS based on the build host's CPU. EC2 # build hosts native-compile and upload to S3 → prod hosts pull. As long