diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a0cf6b7..778dfe2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,6 +12,14 @@ on: description: "Dry run (skip release creation)" type: boolean default: true + profile: + description: "Cargo profile for release binaries" + type: choice + default: "profiling" + options: + - profiling + - maxperf + - release permissions: contents: read @@ -102,13 +110,35 @@ jobs: - name: Install cross main run: cargo install cross --git https://github.com/cross-rs/cross + - name: Resolve build profile + id: profile + run: | + # Tag push -> profiling (matches Dockerfile + EC2 deploy defaults; + # see PR rationale: maxperf regresses ERC20 long tail). + # workflow_dispatch -> user-selected profile (defaults to profiling, + # `maxperf` available for eth-heavy reference builds). + if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + PROFILE="${{ inputs.profile }}" + else + PROFILE="profiling" + fi + # Cargo's target dir is "debug" for dev, otherwise the profile name. + if [[ "$PROFILE" == "dev" ]]; then + PROFILE_DIR="debug" + else + PROFILE_DIR="$PROFILE" + fi + echo "profile=$PROFILE" >> "$GITHUB_OUTPUT" + echo "profile_dir=$PROFILE_DIR" >> "$GITHUB_OUTPUT" + echo "Resolved profile: $PROFILE (target dir: $PROFILE_DIR)" + - name: Build binary - run: make build-${{ matrix.target }} + run: make build-${{ matrix.target }} PROFILE=${{ steps.profile.outputs.profile }} - name: Package binary run: | mkdir -p dist - cp target/${{ matrix.target }}/release/morph-reth dist/ + cp target/${{ matrix.target }}/${{ steps.profile.outputs.profile_dir }}/morph-reth dist/ cd dist tar czf ../${{ matrix.archive }}.tar.gz morph-reth cd .. diff --git a/Cargo.toml b/Cargo.toml index 901cfcb..7f0ee7f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,48 @@ unnameable-types = "warn" [workspace.lints.rustdoc] all = "warn" +# Speed up compilation time for dev builds by reducing emitted debug info. +# NOTE: Debuggers may provide less useful information with this setting. +# Uncomment this section if you're using a debugger. +[profile.dev] +# https://davidlattimore.github.io/posts/2024/02/04/speeding-up-the-rust-edit-build-run-cycle.html +debug = "line-tables-only" +split-debuginfo = "unpacked" + +# Meant for testing - all optimizations, but with debug assertions and overflow checks. +[profile.hivetests] +inherits = "test" +opt-level = 3 +lto = "thin" + +[profile.release] +opt-level = 3 +lto = "thin" +debug = "none" +strip = "symbols" +panic = "unwind" +codegen-units = 16 + +# Use the `--profile profiling` flag to keep line-table symbols in a release +# build — suitable for production flame graphs. +# e.g. `cargo build --profile profiling` +[profile.profiling] +inherits = "release" +debug = "line-tables-only" +strip = "none" +incremental = true + +# Include debug info in benchmarks too. +[profile.bench] +inherits = "profiling" + +# Maximum runtime performance. Fat LTO + single codegen unit; use for +# throughput-sensitive production binaries (expect noticeably slower link). +[profile.maxperf] +inherits = "release" +lto = "fat" +codegen-units = 1 + [workspace.dependencies] morph-chainspec = { path = "crates/chainspec", default-features = false } morph-consensus = { path = "crates/consensus", default-features = false } diff --git a/Dockerfile b/Dockerfile index 856f8bb..df0e8a9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,20 +15,40 @@ RUN cargo chef prepare --recipe-path recipe.json FROM chef AS builder COPY --from=planner /app/recipe.json recipe.json -# Build profile, release by default -ARG BUILD_PROFILE=release +# Build profile. Defaults to `profiling` (thin LTO + line-table debug info). +# Bench data on M4 Pro showed `maxperf` (fat LTO + cgu=1) hitting a hard +# trade-off on ERC20 workloads: peak `eth_transfer` throughput +13% but +# median ERC20 TPS -10% and import_ms long tail catastrophic (p99.9 90ms, +# max 456ms, 0.36% blocks > 50ms vs profiling's 0%). `profiling` keeps +# ~80% of maxperf's hot-path gain, no ERC20 regression, AND gives prod +# binaries line-table symbols so incidents produce clean stack traces. +# Override with `--build-arg BUILD_PROFILE=maxperf` for eth-heavy nodes +# that don't care about long tail, or `release` for slim/stripped. +ARG BUILD_PROFILE=profiling ENV BUILD_PROFILE=$BUILD_PROFILE -# Extra Cargo flags -ARG RUSTFLAGS="" -ENV RUSTFLAGS="$RUSTFLAGS" +# Architecture-conditional RUSTFLAGS: +# - linux/amd64 (default): enable x86-64-v3 baseline (Haswell+ / Excavator+) +# plus pclmulqdq (carry-less multiply, used by keccak/GHASH but not auto- +# enabled by v3). Matches upstream reth release defaults. Pre-2013 Intel +# and pre-2015 AMD CPUs will SIGILL on these binaries — pass an explicit +# `--build-arg RUSTFLAGS=""` for those hosts. +# - other platforms (linux/arm64, etc.): no architecture flag. +ARG TARGETPLATFORM +ARG RUSTFLAGS= # Build dependencies (cached layer) -RUN cargo chef cook --profile $BUILD_PROFILE --recipe-path recipe.json +RUN if [ -z "$RUSTFLAGS" ] && [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ + export RUSTFLAGS="-C target-cpu=x86-64-v3 -C target-feature=+pclmulqdq"; \ + fi && \ + cargo chef cook --profile $BUILD_PROFILE --recipe-path recipe.json # Build the application COPY . . -RUN cargo build --profile $BUILD_PROFILE --locked --bin morph-reth +RUN if [ -z "$RUSTFLAGS" ] && [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ + export RUSTFLAGS="-C target-cpu=x86-64-v3 -C target-feature=+pclmulqdq"; \ + fi && \ + cargo build --profile $BUILD_PROFILE --locked --bin morph-reth # Copy binary to a fixed location (ARG not resolved in COPY) RUN cp /app/target/$BUILD_PROFILE/morph-reth /app/morph-reth diff --git a/Makefile b/Makefile index 7d6be40..a6ac5b6 100644 --- a/Makefile +++ b/Makefile @@ -40,15 +40,21 @@ install: ## Build and install the morph-reth binary under `$(CARGO_HOME)/bin`. ##@ Cross Build (requires Docker + cross: cargo install cross --git https://github.com/cross-rs/cross) +# x86_64 release artifacts target the x86-64-v3 baseline (Haswell+ / +# Excavator+), matching upstream reth. `+pclmulqdq` enables carry-less +# multiply (used by keccak/GHASH) which v3 does not auto-imply. Pre-2013 +# Intel and pre-2015 AMD CPUs will SIGILL on these binaries. +build-x86_64-unknown-linux-gnu: RUSTFLAGS_ARCH = -C target-cpu=x86-64-v3 -C target-feature=+pclmulqdq + +# aarch64 needs larger jemalloc page size (64KB pages on some ARM systems) +build-aarch64-unknown-linux-gnu: export JEMALLOC_SYS_WITH_LG_PAGE=16 + # Pattern rule: cross-build for any target, e.g. `make build-x86_64-unknown-linux-gnu` # See: https://github.com/cross-rs/cross/wiki/FAQ#undefined-reference-with-build-std build-%: ## Cross-build morph-reth for a specific target (e.g. build-x86_64-unknown-linux-gnu). - RUSTFLAGS="-C link-arg=-lgcc -Clink-arg=-static-libgcc" \ + RUSTFLAGS="-C link-arg=-lgcc -Clink-arg=-static-libgcc $(RUSTFLAGS_ARCH)" \ cross build --locked --bin morph-reth --target $* --profile "$(PROFILE)" -# aarch64 needs larger jemalloc page size (64KB pages on some ARM systems) -build-aarch64-unknown-linux-gnu: export JEMALLOC_SYS_WITH_LG_PAGE=16 - # Create a `.tar.gz` containing the morph-reth binary for a specific target. define tarball_release_binary cp $(CARGO_TARGET_DIR)/$(PROFILE)/morph-reth $(BIN_DIR)/morph-reth diff --git a/MakefileEc2.mk b/MakefileEc2.mk index 315eb1e..f423265 100644 --- a/MakefileEc2.mk +++ b/MakefileEc2.mk @@ -5,11 +5,29 @@ DIST_DIR = dist BINARY = morph-reth TARBALL = morph-reth.tar.gz CARGO_TARGET_DIR ?= target -PROFILE ?= release +# Production deploys go to EC2 via S3. Default to `profiling` (thin LTO + +# line-table debug) — matches the Dockerfile default. Bench data showed +# `maxperf` (fat LTO + cgu=1) regresses ERC20 median TPS -10% and explodes +# the import_ms long tail (p99.9 90ms, max 456ms) while only winning eth- +# transfer by 7%. `profiling` keeps most of the throughput gain, no ERC20 +# regression, and ships line-table symbols for incident diagnosis. +PROFILE ?= profiling + +# Architecture-conditional RUSTFLAGS based on the build host's CPU. EC2 +# build hosts native-compile and upload to S3 → prod hosts pull. As long +# as build host and prod host share architecture, the v3 baseline is +# safe for any 2015+ x86_64 EC2 instance type (m5/m6i/c5/c6i/r5/r6i etc.). +# Graviton ARM hosts skip the flag. +ARCH := $(shell uname -m) +ifeq ($(ARCH),x86_64) +RUSTFLAGS_ARCH := -C target-cpu=x86-64-v3 -C target-feature=+pclmulqdq +else +RUSTFLAGS_ARCH := +endif define cargo_build_and_upload if [ ! -d $(DIST_DIR) ]; then mkdir -p $(DIST_DIR); fi - CARGO_NET_GIT_FETCH_WITH_CLI=true cargo build --bin $(BINARY) --profile "$(PROFILE)" --target-dir "$(CARGO_TARGET_DIR)" + CARGO_NET_GIT_FETCH_WITH_CLI=true RUSTFLAGS="$(RUSTFLAGS_ARCH)" cargo build --bin $(BINARY) --profile "$(PROFILE)" --target-dir "$(CARGO_TARGET_DIR)" cp "$(CARGO_TARGET_DIR)/$(PROFILE)/$(BINARY)" "$(DIST_DIR)/" tar -czvf $(TARBALL) $(DIST_DIR) aws s3 cp $(TARBALL) $(1) diff --git a/crates/evm/src/block/curie.rs b/crates/evm/src/block/curie.rs index 36c7358..c3092ac 100644 --- a/crates/evm/src/block/curie.rs +++ b/crates/evm/src/block/curie.rs @@ -126,7 +126,7 @@ mod tests { .storage .into_iter() .collect::>(); - storage.sort_by(|(a, _), (b, _)| a.cmp(b)); + storage.sort_by_key(|(a, _)| *a); let expected_storage = [ (GPO_L1_BLOB_BASE_FEE_SLOT, INITIAL_L1_BLOB_BASE_FEE),