diff --git a/.github/README.md b/.github/README.md
new file mode 120000
index 000000000000..e5c578ba74b5
--- /dev/null
+++ b/.github/README.md
@@ -0,0 +1 @@
+../doc/benchcoin.md
\ No newline at end of file
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 000000000000..f58da7c5bce5
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,186 @@
+name: Benchmark
+on:
+ pull_request:
+ branches:
+ - master
+
+jobs:
+ build-binaries:
+ runs-on: [self-hosted, linux, x64]
+ env:
+ BASE_SHA: ${{ github.event.pull_request.base.sha }}
+ steps:
+ - name: Checkout repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 1
+
+ - name: Fetch base commit
+ run: |
+ echo "HEAD_SHA=$(git rev-parse HEAD)" >> "$GITHUB_ENV"
+ git fetch --depth=1 origin ${{ github.event.pull_request.base.sha }}
+
+ - name: Build both binaries
+ run: |
+ nix develop --command python3 bench.py build \
+ -o ${{ runner.temp }}/binaries \
+ $BASE_SHA:base $HEAD_SHA:head
+
+ - name: Upload binaries
+ uses: actions/upload-artifact@v4
+ with:
+ name: bitcoind-binaries
+ path: ${{ runner.temp }}/binaries/
+
+ uninstrumented:
+ needs: build-binaries
+ strategy:
+ matrix:
+ include:
+ - name: mainnet-default-uninstrumented
+ timeout: 600
+ dbcache: 450
+ - name: mainnet-large-uninstrumented
+ timeout: 600
+ dbcache: 32000
+ runs-on: [self-hosted, linux, x64]
+ timeout-minutes: ${{ matrix.timeout }}
+ env:
+ ORIGINAL_DATADIR: /data/pruned-840k
+ BASE_SHA: ${{ github.event.pull_request.base.sha }}
+ steps:
+ - name: Checkout repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 1
+
+ - name: Download binaries
+ uses: actions/download-artifact@v4
+ with:
+ name: bitcoind-binaries
+ path: ${{ runner.temp }}/binaries
+
+ - name: Set binary permissions
+ run: |
+ chmod +x ${{ runner.temp }}/binaries/base/bitcoind
+ chmod +x ${{ runner.temp }}/binaries/head/bitcoind
+
+ - name: Fetch base commit
+ run: |
+ echo "HEAD_SHA=$(git rev-parse HEAD)" >> "$GITHUB_ENV"
+ git fetch --depth=1 origin ${{ github.event.pull_request.base.sha }}
+
+ - name: Run benchmark
+ run: |
+ nix develop --command python3 bench.py --profile ci run \
+ --datadir $ORIGINAL_DATADIR \
+ --tmp-datadir ${{ runner.temp }}/datadir \
+ --output-dir ${{ runner.temp }}/output \
+ --dbcache ${{ matrix.dbcache }} \
+ base:${{ runner.temp }}/binaries/base/bitcoind \
+ head:${{ runner.temp }}/binaries/head/bitcoind
+
+ - name: Upload results
+ uses: actions/upload-artifact@v4
+ with:
+ name: result-${{ matrix.name }}
+ path: ${{ runner.temp }}/output/results.json
+
+ - name: Write context metadata
+ env:
+ GITHUB_CONTEXT: ${{ toJSON(github) }}
+ RUNNER_CONTEXT: ${{ toJSON(runner) }}
+ run: |
+ mkdir -p ${{ runner.temp }}/contexts
+ echo "$GITHUB_CONTEXT" | nix develop --command jq "del(.token)" > ${{ runner.temp }}/contexts/github.json
+ echo "$RUNNER_CONTEXT" > ${{ runner.temp }}/contexts/runner.json
+
+ - name: Upload context metadata
+ uses: actions/upload-artifact@v4
+ with:
+ name: run-metadata-${{ matrix.name }}
+ path: ${{ runner.temp }}/contexts/
+
+ instrumented:
+ needs: build-binaries
+ strategy:
+ matrix:
+ include:
+ - name: mainnet-default-instrumented
+ timeout: 600
+ dbcache: 450
+ - name: mainnet-large-instrumented
+ timeout: 600
+ dbcache: 32000
+ runs-on: [self-hosted, linux, x64]
+ timeout-minutes: ${{ matrix.timeout }}
+ env:
+ ORIGINAL_DATADIR: /data/pruned-840k
+ BASE_SHA: ${{ github.event.pull_request.base.sha }}
+ steps:
+ - name: Checkout repo
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 1
+
+ - name: Download binaries
+ uses: actions/download-artifact@v4
+ with:
+ name: bitcoind-binaries
+ path: ${{ runner.temp }}/binaries
+
+ - name: Set binary permissions
+ run: |
+ chmod +x ${{ runner.temp }}/binaries/base/bitcoind
+ chmod +x ${{ runner.temp }}/binaries/head/bitcoind
+
+ - name: Fetch base commit
+ run: |
+ echo "HEAD_SHA=$(git rev-parse HEAD)" >> "$GITHUB_ENV"
+ git fetch --depth=1 origin ${{ github.event.pull_request.base.sha }}
+
+ - name: Run instrumented benchmark
+ run: |
+ nix develop --command python3 bench.py --profile ci run \
+ --instrumented \
+ --datadir $ORIGINAL_DATADIR \
+ --tmp-datadir ${{ runner.temp }}/datadir \
+ --output-dir ${{ runner.temp }}/output \
+ --dbcache ${{ matrix.dbcache }} \
+ base:${{ runner.temp }}/binaries/base/bitcoind \
+ head:${{ runner.temp }}/binaries/head/bitcoind
+
+ - name: Upload results
+ uses: actions/upload-artifact@v4
+ with:
+ name: result-${{ matrix.name }}
+ path: ${{ runner.temp }}/output/results.json
+
+ - name: Upload plots
+ uses: actions/upload-artifact@v4
+ with:
+ name: pngs-${{ matrix.name }}
+ path: ${{ runner.temp }}/output/plots/*.png
+ if-no-files-found: ignore
+
+ - name: Upload flamegraphs
+ uses: actions/upload-artifact@v4
+ with:
+ name: flamegraph-${{ matrix.name }}
+ path: ${{ runner.temp }}/output/*-flamegraph.svg
+ if-no-files-found: ignore
+
+ - name: Write context metadata
+ env:
+ GITHUB_CONTEXT: ${{ toJSON(github) }}
+ RUNNER_CONTEXT: ${{ toJSON(runner) }}
+ run: |
+ mkdir -p ${{ runner.temp }}/contexts
+ echo "$GITHUB_CONTEXT" | nix develop --command jq "del(.token)" > ${{ runner.temp }}/contexts/github.json
+ echo "$RUNNER_CONTEXT" > ${{ runner.temp }}/contexts/runner.json
+
+ - name: Upload context metadata
+ uses: actions/upload-artifact@v4
+ with:
+ name: run-metadata-${{ matrix.name }}
+ path: ${{ runner.temp }}/contexts/
diff --git a/.github/workflows/publish-results.yml b/.github/workflows/publish-results.yml
new file mode 100644
index 000000000000..72b1bdd39527
--- /dev/null
+++ b/.github/workflows/publish-results.yml
@@ -0,0 +1,143 @@
+name: Publish Results
+on:
+ workflow_run:
+ workflows: ["Benchmark"]
+ types: [completed]
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ if: ${{ github.event.workflow_run.conclusion == 'success' }}
+ permissions:
+ actions: read
+ contents: write
+ checks: read
+ env:
+ NETWORKS: "mainnet-default-instrumented,mainnet-large-instrumented,mainnet-default-uninstrumented,mainnet-large-uninstrumented"
+ outputs:
+ speedups: ${{ steps.generate.outputs.speedups }}
+ pr-number: ${{ steps.metadata.outputs.pr-number }}
+ result-url: ${{ steps.generate.outputs.result-url }}
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ ref: gh-pages
+
+ - name: Checkout benchcoin tools
+ uses: actions/checkout@v4
+ with:
+ ref: master
+ path: benchcoin-tools
+
+ - name: Download artifacts
+ env:
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: |
+ gh run download ${{ github.event.workflow_run.id }} --repo ${{ github.repository }}
+
+ - name: Extract artifacts
+ run: |
+ for network in ${NETWORKS//,/ }; do
+ # Create network-specific directories with results
+ if [ -d "result-${network}" ]; then
+ mkdir -p "${network}-results"
+ mv "result-${network}/results.json" "${network}-results/"
+ fi
+
+ # Copy flamegraphs into network results directory
+ if [ -d "flamegraph-${network}" ]; then
+ cp -r "flamegraph-${network}"/* "${network}-results/" 2>/dev/null || true
+ fi
+
+ # Copy plots into network results directory
+ if [ -d "pngs-${network}" ]; then
+ mkdir -p "${network}-results/plots"
+ cp -r "pngs-${network}"/* "${network}-results/plots/" 2>/dev/null || true
+ fi
+
+ # Keep metadata separate for extraction
+ if [ -d "run-metadata-${network}" ]; then
+ mkdir -p "${network}-metadata"
+ mv "run-metadata-${network}"/* "${network}-metadata/"
+ fi
+ done
+
+ - name: Extract metadata
+ id: metadata
+ run: |
+ # Find PR number and run ID from any available metadata
+ for network in ${NETWORKS//,/ }; do
+ if [ -f "${network}-metadata/github.json" ]; then
+ PR_NUMBER=$(jq -r '.event.pull_request.number // "main"' "${network}-metadata/github.json")
+ RUN_ID=$(jq -r '.run_id' "${network}-metadata/github.json")
+ echo "pr-number=${PR_NUMBER}" >> $GITHUB_OUTPUT
+ echo "run-id=${RUN_ID}" >> $GITHUB_OUTPUT
+ echo "Found metadata: PR=${PR_NUMBER}, Run=${RUN_ID}"
+ break
+ fi
+ done
+
+ - name: Setup Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.12'
+
+ - name: Generate report
+ id: generate
+ env:
+ PR_NUMBER: ${{ steps.metadata.outputs.pr-number }}
+ RUN_ID: ${{ steps.metadata.outputs.run-id }}
+ run: |
+ cd benchcoin-tools
+
+ # Build network arguments
+ NETWORK_ARGS=""
+ for network in ${NETWORKS//,/ }; do
+ if [ -d "../${network}-results" ]; then
+ NETWORK_ARGS="${NETWORK_ARGS} --network ${network}:../${network}-results"
+ fi
+ done
+
+ # Generate report
+ python3 bench.py report \
+ ${NETWORK_ARGS} \
+ --pr-number "${PR_NUMBER}" \
+ --run-id "${RUN_ID}" \
+ --update-index \
+ "../results/pr-${PR_NUMBER}/${RUN_ID}"
+
+ # Read speedups from generated results.json
+ SPEEDUPS=$(jq -r '.speedups | to_entries | map(select(.key | contains("uninstrumented"))) | map("\(.key): \(.value)%") | join(", ")' "../results/pr-${PR_NUMBER}/${RUN_ID}/results.json")
+ echo "speedups=${SPEEDUPS}" >> $GITHUB_OUTPUT
+
+ RESULT_URL="https://${{ github.repository_owner }}.github.io/${{ github.event.repository.name }}/results/pr-${PR_NUMBER}/${RUN_ID}/index.html"
+ echo "result-url=${RESULT_URL}" >> $GITHUB_OUTPUT
+
+ - name: Upload Pages artifact
+ uses: actions/upload-pages-artifact@v3
+ with:
+ path: results
+
+ - name: Commit and push to gh-pages
+ run: |
+ git config --global user.name "github-actions[bot]"
+ git config --global user.email "github-actions[bot]@users.noreply.github.com"
+ git add results/ index.html
+ git commit -m "Update benchmark results from run ${{ github.event.workflow_run.id }}"
+ git push origin gh-pages
+
+ comment-pr:
+ needs: build
+ runs-on: ubuntu-latest
+ permissions:
+ pull-requests: write
+ actions: read
+ steps:
+ - name: Comment on PR
+ if: ${{ needs.build.outputs.pr-number != 'main' }}
+ env:
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: |
+ gh pr comment ${{ needs.build.outputs.pr-number }} \
+ --repo ${{ github.repository }} \
+ --body "📊 Benchmark results for this run (${{ github.event.workflow_run.id }}) will be available at: ${{ needs.build.outputs.result-url }} after the github pages \"build and deployment\" action has completed.
+ 🚀 Speedups: ${{ needs.build.outputs.speedups }}"
diff --git a/bench.py b/bench.py
new file mode 100755
index 000000000000..aba690a25ca2
--- /dev/null
+++ b/bench.py
@@ -0,0 +1,529 @@
+#!/usr/bin/env python3
+"""Benchcoin - Bitcoin Core benchmarking toolkit.
+
+A CLI for building, benchmarking, analyzing, and reporting on Bitcoin Core
+performance.
+
+Usage:
+ bench.py build COMMIT[:NAME]... Build bitcoind at one or more commits
+ bench.py run NAME:BINARY... Benchmark one or more binaries
+ bench.py analyze COMMIT LOGFILE Generate plots from debug.log
+ bench.py compare RESULTS... Compare benchmark results
+ bench.py report INPUT OUTPUT Generate HTML report
+
+Examples:
+ # Build two commits
+ bench.py build HEAD~1:before HEAD:after
+
+ # Benchmark built binaries
+ bench.py run before:./binaries/before/bitcoind after:./binaries/after/bitcoind --datadir /data
+
+ # Compare results
+ bench.py compare ./bench-output/results.json
+
+ # Generate HTML report
+ bench.py report ./bench-output ./report
+"""
+
+from __future__ import annotations
+
+import argparse
+import logging
+import sys
+from pathlib import Path
+
+from bench.capabilities import detect_capabilities
+from bench.config import build_config
+
+logging.basicConfig(
+ level=logging.INFO,
+ format="%(levelname)s: %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+
+def cmd_build(args: argparse.Namespace) -> int:
+ """Build bitcoind at one or more commits."""
+ from bench.build import BuildPhase
+
+ capabilities = detect_capabilities()
+ config = build_config(
+ cli_args={
+ "binaries_dir": args.output_dir,
+ "skip_existing": args.skip_existing,
+ "dry_run": args.dry_run,
+ "verbose": args.verbose,
+ },
+ config_file=Path(args.config) if args.config else None,
+ profile=args.profile,
+ )
+
+ if args.verbose:
+ logging.getLogger().setLevel(logging.DEBUG)
+
+ phase = BuildPhase(config, capabilities)
+
+ try:
+ result = phase.run(
+ args.commits,
+ output_dir=Path(args.output_dir) if args.output_dir else None,
+ )
+ logger.info(f"Built {len(result.binaries)} binary(ies):")
+ for binary in result.binaries:
+ logger.info(f" {binary.name}: {binary.path}")
+ return 0
+ except Exception as e:
+ logger.error(f"Build failed: {e}")
+ return 1
+
+
+def cmd_run(args: argparse.Namespace) -> int:
+ """Run benchmark on one or more binaries."""
+ from bench.benchmark import BenchmarkPhase, parse_binary_spec
+
+ capabilities = detect_capabilities()
+ config = build_config(
+ cli_args={
+ "datadir": args.datadir,
+ "tmp_datadir": args.tmp_datadir,
+ "output_dir": args.output_dir,
+ "stop_height": args.stop_height,
+ "dbcache": args.dbcache,
+ "runs": args.runs,
+ "connect": args.connect,
+ "chain": args.chain,
+ "instrumented": args.instrumented,
+ "no_cache_drop": args.no_cache_drop,
+ "dry_run": args.dry_run,
+ "verbose": args.verbose,
+ },
+ config_file=Path(args.config) if args.config else None,
+ profile=args.profile,
+ )
+
+ if args.verbose:
+ logging.getLogger().setLevel(logging.DEBUG)
+
+ errors = config.validate()
+ if errors:
+ for error in errors:
+ logger.error(error)
+ return 1
+
+ # Parse binary specs
+ try:
+ binaries = [parse_binary_spec(spec) for spec in args.binaries]
+ except ValueError as e:
+ logger.error(str(e))
+ return 1
+
+ # Validate binaries exist
+ for name, path in binaries:
+ if not path.exists():
+ logger.error(f"Binary not found: {path} ({name})")
+ return 1
+
+ phase = BenchmarkPhase(config, capabilities)
+ output_dir = Path(config.output_dir)
+
+ try:
+ result = phase.run(
+ binaries=binaries,
+ datadir=Path(config.datadir),
+ output_dir=output_dir,
+ )
+ logger.info(f"Results saved to: {result.results_file}")
+
+ # For instrumented runs, also generate plots
+ if config.instrumented:
+ from bench.analyze import AnalyzePhase
+
+ analyze_phase = AnalyzePhase()
+
+ for binary_result in result.binaries:
+ if binary_result.debug_log:
+ try:
+ analyze_phase.run(
+ commit=binary_result.name,
+ log_file=binary_result.debug_log,
+ output_dir=output_dir / "plots",
+ )
+ except Exception as e:
+ logger.warning(f"Analysis for {binary_result.name} failed: {e}")
+
+ return 0
+ except Exception as e:
+ logger.error(f"Benchmark failed: {e}")
+ if args.verbose:
+ import traceback
+
+ traceback.print_exc()
+ return 1
+
+
+def cmd_compare(args: argparse.Namespace) -> int:
+ """Compare benchmark results from multiple files."""
+ from bench.compare import ComparePhase
+
+ if args.verbose:
+ logging.getLogger().setLevel(logging.DEBUG)
+
+ results_files = [Path(f) for f in args.results_files]
+
+ # Validate files exist
+ for f in results_files:
+ if not f.exists():
+ logger.error(f"Results file not found: {f}")
+ return 1
+
+ phase = ComparePhase()
+
+ try:
+ result = phase.run(results_files, baseline=args.baseline)
+
+ # Output results
+ output_json = phase.to_json(result)
+
+ if args.output:
+ output_path = Path(args.output)
+ output_path.write_text(output_json)
+ logger.info(f"Comparison saved to: {output_path}")
+ else:
+ print(output_json)
+
+ return 0
+ except Exception as e:
+ logger.error(f"Comparison failed: {e}")
+ if args.verbose:
+ import traceback
+
+ traceback.print_exc()
+ return 1
+
+
+def cmd_analyze(args: argparse.Namespace) -> int:
+ """Generate plots from debug.log."""
+ from bench.analyze import AnalyzePhase
+
+ if args.verbose:
+ logging.getLogger().setLevel(logging.DEBUG)
+
+ log_file = Path(args.log_file)
+ output_dir = Path(args.output_dir)
+
+ if not log_file.exists():
+ logger.error(f"Log file not found: {log_file}")
+ return 1
+
+ phase = AnalyzePhase()
+
+ try:
+ result = phase.run(
+ commit=args.commit,
+ log_file=log_file,
+ output_dir=output_dir,
+ )
+ logger.info(f"Generated {len(result.plots)} plots in {result.output_dir}")
+ return 0
+ except Exception as e:
+ logger.error(f"Analysis failed: {e}")
+ if args.verbose:
+ import traceback
+
+ traceback.print_exc()
+ return 1
+
+
+def cmd_report(args: argparse.Namespace) -> int:
+ """Generate HTML report from benchmark results."""
+ from bench.report import ReportPhase
+
+ if args.verbose:
+ logging.getLogger().setLevel(logging.DEBUG)
+
+ output_dir = Path(args.output_dir)
+ phase = ReportPhase()
+
+ try:
+ # CI multi-network mode
+ if args.networks:
+ network_dirs = {}
+ for spec in args.networks:
+ if ":" not in spec:
+ logger.error(f"Invalid network spec '{spec}': must be NETWORK:PATH")
+ return 1
+ network, path = spec.split(":", 1)
+ network_dirs[network] = Path(path)
+
+ # Validate directories exist
+ for network, path in network_dirs.items():
+ if not path.exists():
+ logger.error(f"Network directory not found: {path} ({network})")
+ return 1
+
+ result = phase.run_multi_network(
+ network_dirs=network_dirs,
+ output_dir=output_dir,
+ title=args.title or "Benchmark Results",
+ pr_number=args.pr_number,
+ run_id=args.run_id,
+ )
+
+ # Update main index if we have a results directory
+ if args.update_index:
+ results_base = output_dir.parent.parent # Go up from pr-N/run-id
+ if results_base.exists():
+ phase.update_index(results_base, results_base.parent / "index.html")
+ else:
+ # Standard single-directory mode
+ input_dir = Path(args.input_dir)
+
+ if not input_dir.exists():
+ logger.error(f"Input directory not found: {input_dir}")
+ return 1
+
+ result = phase.run(
+ input_dir=input_dir,
+ output_dir=output_dir,
+ title=args.title or "Benchmark Results",
+ )
+
+ # Print speedups
+ if result.speedups:
+ logger.info("Speedups:")
+ for network, speedup in result.speedups.items():
+ sign = "+" if speedup > 0 else ""
+ logger.info(f" {network}: {sign}{speedup}%")
+
+ return 0
+ except Exception as e:
+ logger.error(f"Report generation failed: {e}")
+ if args.verbose:
+ import traceback
+
+ traceback.print_exc()
+ return 1
+
+
+def main() -> int:
+ """Main entry point."""
+ parser = argparse.ArgumentParser(
+ description="Benchcoin - Bitcoin Core benchmarking toolkit",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog=__doc__,
+ )
+
+ parser.add_argument(
+ "--config",
+ metavar="PATH",
+ help="Config file (default: bench.toml)",
+ )
+ parser.add_argument(
+ "--profile",
+ choices=["quick", "full", "ci"],
+ default="full",
+ help="Configuration profile (default: full)",
+ )
+ parser.add_argument(
+ "-v",
+ "--verbose",
+ action="store_true",
+ help="Verbose output",
+ )
+ parser.add_argument(
+ "--dry-run",
+ action="store_true",
+ help="Show what would be done without executing",
+ )
+
+ subparsers = parser.add_subparsers(dest="command", help="Commands")
+
+ # Build command
+ build_parser = subparsers.add_parser(
+ "build",
+ help="Build bitcoind at one or more commits",
+ description="Build bitcoind binaries from git commits. "
+ "Each commit can optionally have a name suffix: COMMIT:NAME",
+ )
+ build_parser.add_argument(
+ "commits",
+ nargs="+",
+ metavar="COMMIT[:NAME]",
+ help="Commit(s) to build. Format: COMMIT or COMMIT:NAME (e.g., HEAD:latest, abc123:v27)",
+ )
+ build_parser.add_argument(
+ "-o",
+ "--output-dir",
+ metavar="PATH",
+ help="Where to store binaries (default: ./binaries)",
+ )
+ build_parser.add_argument(
+ "--skip-existing",
+ action="store_true",
+ help="Skip build if binary already exists",
+ )
+ build_parser.set_defaults(func=cmd_build)
+
+ # Run command
+ run_parser = subparsers.add_parser(
+ "run",
+ help="Run benchmark on one or more binaries",
+ description="Benchmark bitcoind binaries using hyperfine. "
+ "Each binary must have a name and path: NAME:PATH",
+ )
+ run_parser.add_argument(
+ "binaries",
+ nargs="+",
+ metavar="NAME:PATH",
+ help="Binary(ies) to benchmark. Format: NAME:PATH (e.g., v27:./binaries/v27/bitcoind)",
+ )
+ run_parser.add_argument(
+ "--datadir",
+ required=True,
+ metavar="PATH",
+ help="Source datadir with blockchain snapshot",
+ )
+ run_parser.add_argument(
+ "--tmp-datadir",
+ metavar="PATH",
+ help="Temp datadir for benchmark runs",
+ )
+ run_parser.add_argument(
+ "-o",
+ "--output-dir",
+ metavar="PATH",
+ help="Output directory for results (default: ./bench-output)",
+ )
+ run_parser.add_argument(
+ "--stop-height",
+ type=int,
+ metavar="N",
+ help="Block height to stop at",
+ )
+ run_parser.add_argument(
+ "--dbcache",
+ type=int,
+ metavar="N",
+ help="Database cache size in MB",
+ )
+ run_parser.add_argument(
+ "--runs",
+ type=int,
+ metavar="N",
+ help="Number of benchmark iterations",
+ )
+ run_parser.add_argument(
+ "--connect",
+ metavar="ADDR",
+ help="Connect address for sync",
+ )
+ run_parser.add_argument(
+ "--chain",
+ choices=["main", "testnet", "signet", "regtest"],
+ help="Chain to use",
+ )
+ run_parser.add_argument(
+ "--instrumented",
+ action="store_true",
+ help="Enable profiling (flamegraph + debug logging)",
+ )
+ run_parser.add_argument(
+ "--no-cache-drop",
+ action="store_true",
+ help="Skip cache dropping between runs",
+ )
+ run_parser.set_defaults(func=cmd_run)
+
+ # Analyze command
+ analyze_parser = subparsers.add_parser(
+ "analyze", help="Generate plots from debug.log"
+ )
+ analyze_parser.add_argument("commit", help="Commit hash (for naming)")
+ analyze_parser.add_argument("log_file", help="Path to debug.log")
+ analyze_parser.add_argument(
+ "--output-dir",
+ default="./plots",
+ metavar="PATH",
+ help="Output directory for plots",
+ )
+ analyze_parser.set_defaults(func=cmd_analyze)
+
+ # Compare command
+ compare_parser = subparsers.add_parser(
+ "compare",
+ help="Compare benchmark results from multiple files",
+ description="Load and compare results from one or more results.json files. "
+ "Calculates speedup percentages relative to a baseline.",
+ )
+ compare_parser.add_argument(
+ "results_files",
+ nargs="+",
+ metavar="RESULTS_FILE",
+ help="results.json file(s) to compare",
+ )
+ compare_parser.add_argument(
+ "--baseline",
+ metavar="NAME",
+ help="Name of the baseline entry (default: first entry)",
+ )
+ compare_parser.add_argument(
+ "-o",
+ "--output",
+ metavar="FILE",
+ help="Output file for comparison JSON (default: stdout)",
+ )
+ compare_parser.set_defaults(func=cmd_compare)
+
+ # Report command
+ report_parser = subparsers.add_parser(
+ "report",
+ help="Generate HTML report",
+ description="Generate HTML report from benchmark results. "
+ "Use --network for multi-network CI reports.",
+ )
+ report_parser.add_argument(
+ "input_dir",
+ nargs="?",
+ help="Directory with results.json (for single-network mode)",
+ )
+ report_parser.add_argument("output_dir", help="Output directory for report")
+ report_parser.add_argument(
+ "--title",
+ help="Report title",
+ )
+ # CI multi-network options
+ report_parser.add_argument(
+ "--network",
+ dest="networks",
+ action="append",
+ metavar="NAME:PATH",
+ help="Network results directory (repeatable, e.g., --network mainnet:./mainnet-results)",
+ )
+ report_parser.add_argument(
+ "--pr-number",
+ metavar="N",
+ help="PR number (for CI reports)",
+ )
+ report_parser.add_argument(
+ "--run-id",
+ metavar="ID",
+ help="Run ID (for CI reports)",
+ )
+ report_parser.add_argument(
+ "--update-index",
+ action="store_true",
+ help="Update main index.html (for CI reports)",
+ )
+ report_parser.set_defaults(func=cmd_report)
+
+ args = parser.parse_args()
+
+ if not args.command:
+ parser.print_help()
+ return 1
+
+ return args.func(args)
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/bench.toml b/bench.toml
new file mode 100644
index 000000000000..7bd38faa317b
--- /dev/null
+++ b/bench.toml
@@ -0,0 +1,30 @@
+# Benchcoin configuration
+# Values here override built-in defaults but are overridden by environment
+# variables (BENCH_*) and CLI arguments.
+
+[defaults]
+chain = "main"
+dbcache = 450
+stop_height = 855000
+runs = 3
+# connect = "" # Empty or omit to use public P2P network
+
+[paths]
+binaries_dir = "./binaries"
+output_dir = "./bench-output"
+
+# Profiles override specific defaults
+# Usage: bench.py --profile quick full HEAD~1 HEAD
+
+[profiles.quick]
+stop_height = 2000
+runs = 3
+
+[profiles.full]
+stop_height = 855000
+runs = 3
+
+[profiles.ci]
+stop_height = 845000
+runs = 3
+connect = "148.251.128.115:33333"
diff --git a/bench/README.md b/bench/README.md
new file mode 100644
index 000000000000..ca0d011303de
--- /dev/null
+++ b/bench/README.md
@@ -0,0 +1,234 @@
+# Benchcoin
+
+A CLI for benchmarking Bitcoin Core IBD.
+
+## Quick Start
+
+```bash
+# Quick smoke test on signet (requires nix)
+nix develop --command python3 bench.py --profile quick full \
+ --chain signet --datadir /path/to/signet/datadir HEAD~1 HEAD
+
+# Or use just (wraps nix develop)
+just quick HEAD~1 HEAD /path/to/signet/datadir
+```
+
+## Requirements
+
+- **Nix** with flakes enabled (provides hyperfine, flamegraph, etc.)
+- A blockchain datadir snapshot to benchmark against
+- Two git commits to compare
+
+Optional (auto-detected, gracefully degrades without):
+- `/run/wrappers/bin/drop-caches` (NixOS) - clears page cache between runs
+
+## Commands
+
+```
+bench.py [GLOBAL_OPTIONS] COMMAND [OPTIONS] ARGS
+
+Global Options:
+ --profile {quick,full,ci} Configuration profile
+ --config PATH Custom config file
+ -v, --verbose Verbose output
+ --dry-run Show what would run
+
+Commands:
+ build Build bitcoind at two commits
+ run Run benchmark (requires pre-built binaries)
+ analyze Generate plots from debug.log
+ report Generate HTML report
+ full Complete pipeline: build → run → analyze
+```
+
+### build
+
+Build bitcoind binaries at two commits for comparison:
+
+```bash
+python3 bench.py build HEAD~1 HEAD
+python3 bench.py build --binaries-dir /tmp/bins abc123 def456
+python3 bench.py build --skip-existing HEAD~1 HEAD # reuse existing
+```
+
+### run
+
+Run hyperfine benchmark comparing two pre-built binaries:
+
+```bash
+python3 bench.py run --datadir /data/snapshot HEAD~1 HEAD
+python3 bench.py run --instrumented --datadir /data/snapshot HEAD~1 HEAD
+```
+
+Options:
+- `--datadir PATH` - Source blockchain snapshot (required)
+- `--tmp-datadir PATH` - Working directory (default: ./bench-output/tmp-datadir)
+- `--stop-height N` - Block height to sync to
+- `--dbcache N` - Database cache in MB
+- `--runs N` - Number of iterations (default: 3, forced to 1 if instrumented)
+- `--instrumented` - Enable flamegraph profiling and debug logging
+- `--connect ADDR` - P2P node to sync from (empty = public network)
+- `--chain {main,signet,testnet,regtest}` - Which chain
+- `--no-cache-drop` - Don't clear page cache between runs
+
+### analyze
+
+Generate plots from a debug.log file:
+
+```bash
+python3 bench.py analyze abc123 /path/to/debug.log --output-dir ./plots
+```
+
+Generates PNG plots for:
+- Block height vs time
+- Cache size vs height/time
+- Transaction count vs height
+- LevelDB compaction events
+- CoinDB write batches
+
+### report
+
+Generate HTML report from benchmark results:
+
+```bash
+python3 bench.py report ./bench-output ./report
+```
+
+### full
+
+Run complete pipeline (build + run + analyze if instrumented):
+
+```bash
+python3 bench.py --profile quick full --chain signet --datadir /tmp/signet HEAD~1 HEAD
+python3 bench.py --profile full full --datadir /data/mainnet HEAD~1 HEAD
+```
+
+## Profiles
+
+Profiles set sensible defaults for common scenarios:
+
+| Profile | stop_height | runs | dbcache | connect |
+|---------|-------------|------|---------|---------|
+| quick | 1,500 | 1 | 450 | (public network) |
+| full | 855,000 | 3 | 450 | (public network) |
+| ci | 855,000 | 3 | 450 | 148.251.128.115:33333 |
+
+Override any profile setting with CLI flags:
+
+```bash
+python3 bench.py --profile quick full --stop-height 5000 --datadir ... HEAD~1 HEAD
+```
+
+## Configuration
+
+Configuration is layered (lowest to highest priority):
+
+1. Built-in defaults
+2. `bench.toml` (in repo root)
+3. Environment variables (`BENCH_DATADIR`, `BENCH_DBCACHE`, etc.)
+4. CLI arguments
+
+### bench.toml
+
+```toml
+[defaults]
+chain = "main"
+dbcache = 450
+stop_height = 855000
+runs = 3
+
+[paths]
+binaries_dir = "./binaries"
+output_dir = "./bench-output"
+
+[profiles.quick]
+stop_height = 1500
+runs = 1
+dbcache = 450
+
+[profiles.ci]
+connect = "148.251.128.115:33333"
+```
+
+### Environment Variables
+
+```bash
+export BENCH_DATADIR=/data/snapshot
+export BENCH_DBCACHE=1000
+export BENCH_STOP_HEIGHT=100000
+```
+
+## Justfile Recipes
+
+The justfile wraps common operations with `nix develop`:
+
+```bash
+just quick HEAD~1 HEAD /path/to/datadir # Quick signet test
+just full HEAD~1 HEAD /path/to/datadir # Full mainnet benchmark
+just instrumented HEAD~1 HEAD /path/to/datadir # With flamegraphs
+just build HEAD~1 HEAD # Build only
+just run HEAD~1 HEAD /path/to/datadir # Run only (binaries must exist)
+```
+
+## Architecture
+
+```
+bench.py CLI entry point (argparse)
+bench/
+├── config.py Layered configuration (TOML + env + CLI)
+├── capabilities.py System capability detection
+├── build.py Build phase (nix build)
+├── benchmark.py Benchmark phase (hyperfine)
+├── analyze.py Plot generation (matplotlib)
+├── report.py HTML report generation
+└── utils.py Git operations, datadir management
+```
+
+### Capability Detection
+
+The tool auto-detects system capabilities and gracefully degrades:
+
+```python
+from bench.capabilities import detect_capabilities
+caps = detect_capabilities()
+# caps.has_hyperfine, caps.can_drop_caches, etc.
+```
+
+Missing optional features emit warnings but don't fail:
+
+```
+WARNING: drop-caches not available - cache won't be cleared between runs
+```
+
+Missing required features (hyperfine, flamegraph for instrumented) cause errors.
+
+### Hyperfine Integration
+
+The benchmark phase generates temporary shell scripts for hyperfine hooks:
+
+- `setup` - Clean tmp datadir (once before all runs)
+- `prepare` - Copy snapshot, drop caches, clean logs (before each run)
+- `cleanup` - Clean tmp datadir (after all runs per command)
+- `conclude` - Collect flamegraph/logs (instrumented only, after each run)
+
+### Instrumented Mode
+
+When `--instrumented` is set:
+
+1. Wraps bitcoind in `flamegraph` for CPU profiling
+2. Enables debug logging: `-debug=coindb -debug=leveldb -debug=bench -debug=validation`
+3. Forces `runs=1` (profiling overhead makes multiple runs pointless)
+4. Generates flamegraph SVGs and performance plots
+
+## CI Integration
+
+GitHub Actions workflows call bench.py directly (already in nix develop):
+
+```yaml
+- run: |
+ nix develop --command python3 bench.py build \
+ --binaries-dir ${{ runner.temp }}/binaries \
+ $BASE_SHA $HEAD_SHA
+```
+
+CI-specific paths and the dedicated sync node are configured via `--profile ci`.
diff --git a/bench/__init__.py b/bench/__init__.py
new file mode 100644
index 000000000000..cb50424b155c
--- /dev/null
+++ b/bench/__init__.py
@@ -0,0 +1,3 @@
+"""Benchcoin - Bitcoin Core benchmarking toolkit."""
+
+__version__ = "0.1.0"
diff --git a/bench/analyze.py b/bench/analyze.py
new file mode 100644
index 000000000000..baedd97d745c
--- /dev/null
+++ b/bench/analyze.py
@@ -0,0 +1,538 @@
+"""Analyze phase - parse debug.log and generate performance plots.
+
+Refactored from bench-ci/parse_and_plot.py for better structure and reusability.
+"""
+
+from __future__ import annotations
+
+import datetime
+import logging
+import re
+from collections import OrderedDict
+from dataclasses import dataclass
+from pathlib import Path
+
+# matplotlib is optional - gracefully handle if not installed
+try:
+ import matplotlib.pyplot as plt
+
+ HAS_MATPLOTLIB = True
+except ImportError:
+ HAS_MATPLOTLIB = False
+
+logger = logging.getLogger(__name__)
+
+# Bitcoin fork heights for plot annotations
+FORK_HEIGHTS = OrderedDict(
+ [
+ ("BIP34", 227931), # Block v2, coinbase includes height
+ ("BIP66", 363725), # Strict DER signatures
+ ("BIP65", 388381), # OP_CHECKLOCKTIMEVERIFY
+ ("CSV", 419328), # BIP68, 112, 113 - OP_CHECKSEQUENCEVERIFY
+ ("Segwit", 481824), # BIP141, 143, 144, 145 - Segregated Witness
+ ("Taproot", 709632), # BIP341, 342 - Schnorr signatures & Taproot
+ ("Halving 1", 210000), # First halving
+ ("Halving 2", 420000), # Second halving
+ ("Halving 3", 630000), # Third halving
+ ("Halving 4", 840000), # Fourth halving
+ ]
+)
+
+FORK_COLORS = {
+ "BIP34": "blue",
+ "BIP66": "blue",
+ "BIP65": "blue",
+ "CSV": "blue",
+ "Segwit": "green",
+ "Taproot": "red",
+ "Halving 1": "purple",
+ "Halving 2": "purple",
+ "Halving 3": "purple",
+ "Halving 4": "purple",
+}
+
+FORK_STYLES = {
+ "BIP34": "--",
+ "BIP66": "--",
+ "BIP65": "--",
+ "CSV": "--",
+ "Segwit": "--",
+ "Taproot": "--",
+ "Halving 1": ":",
+ "Halving 2": ":",
+ "Halving 3": ":",
+ "Halving 4": ":",
+}
+
+
+@dataclass
+class UpdateTipEntry:
+ """Parsed UpdateTip log entry."""
+
+ timestamp: datetime.datetime
+ height: int
+ tx_count: int
+ cache_size_mb: float
+ cache_coins_count: int
+
+
+@dataclass
+class LevelDBCompactEntry:
+ """Parsed LevelDB compaction log entry."""
+
+ timestamp: datetime.datetime
+
+
+@dataclass
+class LevelDBGenTableEntry:
+ """Parsed LevelDB generated table log entry."""
+
+ timestamp: datetime.datetime
+ keys_count: int
+ bytes_count: int
+
+
+@dataclass
+class ValidationTxAddEntry:
+ """Parsed validation transaction added log entry."""
+
+ timestamp: datetime.datetime
+
+
+@dataclass
+class CoinDBWriteBatchEntry:
+ """Parsed coindb write batch log entry."""
+
+ timestamp: datetime.datetime
+ is_partial: bool
+ size_mb: float
+
+
+@dataclass
+class CoinDBCommitEntry:
+ """Parsed coindb commit log entry."""
+
+ timestamp: datetime.datetime
+ txout_count: int
+
+
+@dataclass
+class ParsedLog:
+ """All parsed data from a debug.log file."""
+
+ update_tip: list[UpdateTipEntry]
+ leveldb_compact: list[LevelDBCompactEntry]
+ leveldb_gen_table: list[LevelDBGenTableEntry]
+ validation_txadd: list[ValidationTxAddEntry]
+ coindb_write_batch: list[CoinDBWriteBatchEntry]
+ coindb_commit: list[CoinDBCommitEntry]
+
+
+@dataclass
+class AnalyzeResult:
+ """Result of the analyze phase."""
+
+ commit: str
+ output_dir: Path
+ plots: list[Path]
+
+
+class LogParser:
+ """Parse bitcoind debug.log files."""
+
+ # Regex patterns
+ UPDATETIP_RE = re.compile(
+ r"^([\d\-:TZ]+) UpdateTip: new best.+height=(\d+).+tx=(\d+).+cache=([\d.]+)MiB\((\d+)txo\)"
+ )
+ LEVELDB_COMPACT_RE = re.compile(r"^([\d\-:TZ]+) \[leveldb] Compacting.*files")
+ LEVELDB_GEN_TABLE_RE = re.compile(
+ r"^([\d\-:TZ]+) \[leveldb] Generated table.*: (\d+) keys, (\d+) bytes"
+ )
+ VALIDATION_TXADD_RE = re.compile(
+ r"^([\d\-:TZ]+) \[validation] TransactionAddedToMempool: txid=.+wtxid=.+"
+ )
+ COINDB_WRITE_BATCH_RE = re.compile(
+ r"^([\d\-:TZ]+) \[coindb] Writing (partial|final) batch of ([\d.]+) MiB"
+ )
+ COINDB_COMMIT_RE = re.compile(
+ r"^([\d\-:TZ]+) \[coindb] Committed (\d+) changed transaction outputs"
+ )
+
+ @staticmethod
+ def parse_timestamp(iso_str: str) -> datetime.datetime:
+ """Parse ISO 8601 timestamp from log."""
+ return datetime.datetime.strptime(iso_str, "%Y-%m-%dT%H:%M:%SZ")
+
+ def parse_file(self, log_file: Path) -> ParsedLog:
+ """Parse a debug.log file and extract all relevant data."""
+ update_tip: list[UpdateTipEntry] = []
+ leveldb_compact: list[LevelDBCompactEntry] = []
+ leveldb_gen_table: list[LevelDBGenTableEntry] = []
+ validation_txadd: list[ValidationTxAddEntry] = []
+ coindb_write_batch: list[CoinDBWriteBatchEntry] = []
+ coindb_commit: list[CoinDBCommitEntry] = []
+
+ with open(log_file, "r", encoding="utf-8") as f:
+ for line in f:
+ if match := self.UPDATETIP_RE.match(line):
+ iso_str, height, tx, cache_mb, coins = match.groups()
+ update_tip.append(
+ UpdateTipEntry(
+ timestamp=self.parse_timestamp(iso_str),
+ height=int(height),
+ tx_count=int(tx),
+ cache_size_mb=float(cache_mb),
+ cache_coins_count=int(coins),
+ )
+ )
+ elif match := self.LEVELDB_COMPACT_RE.match(line):
+ leveldb_compact.append(
+ LevelDBCompactEntry(
+ timestamp=self.parse_timestamp(match.group(1))
+ )
+ )
+ elif match := self.LEVELDB_GEN_TABLE_RE.match(line):
+ iso_str, keys, bytes_count = match.groups()
+ leveldb_gen_table.append(
+ LevelDBGenTableEntry(
+ timestamp=self.parse_timestamp(iso_str),
+ keys_count=int(keys),
+ bytes_count=int(bytes_count),
+ )
+ )
+ elif match := self.VALIDATION_TXADD_RE.match(line):
+ validation_txadd.append(
+ ValidationTxAddEntry(
+ timestamp=self.parse_timestamp(match.group(1))
+ )
+ )
+ elif match := self.COINDB_WRITE_BATCH_RE.match(line):
+ iso_str, batch_type, size_mb = match.groups()
+ coindb_write_batch.append(
+ CoinDBWriteBatchEntry(
+ timestamp=self.parse_timestamp(iso_str),
+ is_partial=(batch_type == "partial"),
+ size_mb=float(size_mb),
+ )
+ )
+ elif match := self.COINDB_COMMIT_RE.match(line):
+ iso_str, txout_count = match.groups()
+ coindb_commit.append(
+ CoinDBCommitEntry(
+ timestamp=self.parse_timestamp(iso_str),
+ txout_count=int(txout_count),
+ )
+ )
+
+ return ParsedLog(
+ update_tip=update_tip,
+ leveldb_compact=leveldb_compact,
+ leveldb_gen_table=leveldb_gen_table,
+ validation_txadd=validation_txadd,
+ coindb_write_batch=coindb_write_batch,
+ coindb_commit=coindb_commit,
+ )
+
+
+class PlotGenerator:
+ """Generate performance plots from parsed log data."""
+
+ def __init__(self, commit: str, output_dir: Path):
+ self.commit = commit
+ self.output_dir = output_dir
+ self.generated_plots: list[Path] = []
+
+ if not HAS_MATPLOTLIB:
+ raise RuntimeError(
+ "matplotlib is required for plot generation. "
+ "Install with: pip install matplotlib"
+ )
+
+ def generate_all(self, data: ParsedLog) -> list[Path]:
+ """Generate all plots from parsed data."""
+ if not data.update_tip:
+ logger.warning("No UpdateTip entries found, skipping plot generation")
+ return []
+
+ # Verify entries are sorted by time
+ for i in range(len(data.update_tip) - 1):
+ if data.update_tip[i].timestamp > data.update_tip[i + 1].timestamp:
+ logger.warning("UpdateTip entries are not sorted by time")
+ break
+
+ # Extract base time for elapsed calculations
+ base_time = data.update_tip[0].timestamp
+
+ # Extract data series
+ times = [e.timestamp for e in data.update_tip]
+ heights = [e.height for e in data.update_tip]
+ tx_counts = [e.tx_count for e in data.update_tip]
+ cache_sizes = [e.cache_size_mb for e in data.update_tip]
+ cache_counts = [e.cache_coins_count for e in data.update_tip]
+ elapsed_minutes = [(t - base_time).total_seconds() / 60 for t in times]
+
+ # Generate core plots
+ self._plot(
+ elapsed_minutes,
+ heights,
+ "Elapsed minutes",
+ "Block Height",
+ "Block Height vs Time",
+ f"{self.commit}-height_vs_time.png",
+ )
+
+ self._plot(
+ heights,
+ cache_sizes,
+ "Block Height",
+ "Cache Size (MiB)",
+ "Cache Size vs Block Height",
+ f"{self.commit}-cache_vs_height.png",
+ is_height_based=True,
+ )
+
+ self._plot(
+ elapsed_minutes,
+ cache_sizes,
+ "Elapsed minutes",
+ "Cache Size (MiB)",
+ "Cache Size vs Time",
+ f"{self.commit}-cache_vs_time.png",
+ )
+
+ self._plot(
+ heights,
+ tx_counts,
+ "Block Height",
+ "Transaction Count",
+ "Transactions vs Block Height",
+ f"{self.commit}-tx_vs_height.png",
+ is_height_based=True,
+ )
+
+ self._plot(
+ heights,
+ cache_counts,
+ "Block Height",
+ "Coins Cache Size",
+ "Coins Cache Size vs Height",
+ f"{self.commit}-coins_cache_vs_height.png",
+ is_height_based=True,
+ )
+
+ # LevelDB plots
+ if data.leveldb_compact:
+ compact_minutes = [
+ (e.timestamp - base_time).total_seconds() / 60
+ for e in data.leveldb_compact
+ ]
+ self._plot(
+ compact_minutes,
+ [1] * len(compact_minutes),
+ "Elapsed minutes",
+ "LevelDB Compaction",
+ "LevelDB Compaction Events vs Time",
+ f"{self.commit}-leveldb_compact_vs_time.png",
+ )
+
+ if data.leveldb_gen_table:
+ gen_minutes = [
+ (e.timestamp - base_time).total_seconds() / 60
+ for e in data.leveldb_gen_table
+ ]
+ gen_keys = [e.keys_count for e in data.leveldb_gen_table]
+ gen_bytes = [e.bytes_count for e in data.leveldb_gen_table]
+
+ self._plot(
+ gen_minutes,
+ gen_keys,
+ "Elapsed minutes",
+ "Number of keys",
+ "LevelDB Keys Generated vs Time",
+ f"{self.commit}-leveldb_gen_keys_vs_time.png",
+ )
+
+ self._plot(
+ gen_minutes,
+ gen_bytes,
+ "Elapsed minutes",
+ "Number of bytes",
+ "LevelDB Bytes Generated vs Time",
+ f"{self.commit}-leveldb_gen_bytes_vs_time.png",
+ )
+
+ # Validation plots
+ if data.validation_txadd:
+ txadd_minutes = [
+ (e.timestamp - base_time).total_seconds() / 60
+ for e in data.validation_txadd
+ ]
+ self._plot(
+ txadd_minutes,
+ [1] * len(txadd_minutes),
+ "Elapsed minutes",
+ "Transaction Additions",
+ "Transaction Additions to Mempool vs Time",
+ f"{self.commit}-validation_txadd_vs_time.png",
+ )
+
+ # CoinDB plots
+ if data.coindb_write_batch:
+ batch_minutes = [
+ (e.timestamp - base_time).total_seconds() / 60
+ for e in data.coindb_write_batch
+ ]
+ batch_sizes = [e.size_mb for e in data.coindb_write_batch]
+ self._plot(
+ batch_minutes,
+ batch_sizes,
+ "Elapsed minutes",
+ "Batch Size MiB",
+ "Coin Database Partial/Final Write Batch Size vs Time",
+ f"{self.commit}-coindb_write_batch_size_vs_time.png",
+ )
+
+ if data.coindb_commit:
+ commit_minutes = [
+ (e.timestamp - base_time).total_seconds() / 60
+ for e in data.coindb_commit
+ ]
+ commit_txouts = [e.txout_count for e in data.coindb_commit]
+ self._plot(
+ commit_minutes,
+ commit_txouts,
+ "Elapsed minutes",
+ "Transaction Output Count",
+ "Coin Database Transaction Output Committed vs Time",
+ f"{self.commit}-coindb_commit_txout_vs_time.png",
+ )
+
+ return self.generated_plots
+
+ def _plot(
+ self,
+ x: list,
+ y: list,
+ x_label: str,
+ y_label: str,
+ title: str,
+ filename: str,
+ is_height_based: bool = False,
+ ) -> None:
+ """Generate a single plot."""
+ if not x or not y:
+ logger.debug(f"Skipping plot '{title}' - no data")
+ return
+
+ plt.figure(figsize=(30, 10))
+ plt.plot(x, y)
+ plt.title(title, fontsize=20)
+ plt.xlabel(x_label, fontsize=16)
+ plt.ylabel(y_label, fontsize=16)
+ plt.grid(True)
+
+ min_x, max_x = min(x), max(x)
+ if min_x < max_x:
+ plt.xlim(min_x, max_x)
+
+ # Add fork markers for height-based plots
+ if is_height_based:
+ self._add_fork_markers(min_x, max_x, max(y))
+
+ plt.xticks(rotation=90, fontsize=12)
+ plt.yticks(fontsize=12)
+ plt.tight_layout()
+
+ output_path = self.output_dir / filename
+ plt.savefig(output_path)
+ plt.close()
+
+ self.generated_plots.append(output_path)
+ logger.info(f"Saved plot: {output_path}")
+
+ def _add_fork_markers(self, min_x: float, max_x: float, max_y: float) -> None:
+ """Add vertical lines for Bitcoin forks."""
+ text_positions = {}
+ position_increment = max_y * 0.05
+ current_position = max_y * 0.9
+
+ for fork_name, height in FORK_HEIGHTS.items():
+ if min_x <= height <= max_x:
+ plt.axvline(
+ x=height,
+ color=FORK_COLORS[fork_name],
+ linestyle=FORK_STYLES[fork_name],
+ )
+
+ if height in text_positions:
+ text_positions[height] -= position_increment
+ else:
+ text_positions[height] = current_position
+ current_position -= position_increment
+ if current_position < max_y * 0.1:
+ current_position = max_y * 0.9
+
+ plt.text(
+ height,
+ text_positions[height],
+ f"{fork_name} ({height})",
+ rotation=90,
+ verticalalignment="top",
+ color=FORK_COLORS[fork_name],
+ )
+
+
+class AnalyzePhase:
+ """Analyze benchmark results and generate plots."""
+
+ def run(
+ self,
+ commit: str,
+ log_file: Path,
+ output_dir: Path,
+ ) -> AnalyzeResult:
+ """Analyze a debug.log and generate plots.
+
+ Args:
+ commit: Commit hash (for naming)
+ log_file: Path to debug.log
+ output_dir: Where to save plots
+
+ Returns:
+ AnalyzeResult with paths to generated plots
+ """
+ if not HAS_MATPLOTLIB:
+ raise RuntimeError(
+ "matplotlib is required for plot generation. "
+ "Install with: pip install matplotlib"
+ )
+
+ if not log_file.exists():
+ raise FileNotFoundError(f"Log file not found: {log_file}")
+
+ output_dir.mkdir(parents=True, exist_ok=True)
+
+ logger.info(f"Parsing log file: {log_file}")
+ parser = LogParser()
+ data = parser.parse_file(log_file)
+
+ # Log parsed data summary
+ logger.info(f" UpdateTip entries: {len(data.update_tip)}")
+ logger.info(f" LevelDB compact entries: {len(data.leveldb_compact)}")
+ logger.info(f" LevelDB gen table entries: {len(data.leveldb_gen_table)}")
+ logger.info(f" Validation txadd entries: {len(data.validation_txadd)}")
+ logger.info(f" CoinDB write batch entries: {len(data.coindb_write_batch)}")
+ logger.info(f" CoinDB commit entries: {len(data.coindb_commit)}")
+
+ logger.info(f"Generating plots for {commit[:12]}")
+ logger.info(f" Output directory: {output_dir}")
+ generator = PlotGenerator(commit[:12], output_dir)
+ plots = generator.generate_all(data)
+
+ logger.info(f"Generated {len(plots)} plots")
+
+ return AnalyzeResult(
+ commit=commit,
+ output_dir=output_dir,
+ plots=plots,
+ )
diff --git a/bench/benchmark.py b/bench/benchmark.py
new file mode 100644
index 000000000000..788e4e53e94d
--- /dev/null
+++ b/bench/benchmark.py
@@ -0,0 +1,349 @@
+"""Benchmark phase - run hyperfine benchmarks on bitcoind binaries."""
+
+from __future__ import annotations
+
+import logging
+import os
+import shutil
+import subprocess
+import tempfile
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from .patchelf import ensure_binary_runnable
+
+if TYPE_CHECKING:
+ from .capabilities import Capabilities
+ from .config import Config
+
+
+logger = logging.getLogger(__name__)
+
+# Debug flags for instrumented mode
+INSTRUMENTED_DEBUG_FLAGS = ["coindb", "leveldb", "bench", "validation"]
+
+
+@dataclass
+class BinaryResult:
+ """Result for a single binary."""
+
+ name: str
+ flamegraph: Path | None = None
+ debug_log: Path | None = None
+
+
+@dataclass
+class BenchmarkResult:
+ """Result of the benchmark phase."""
+
+ results_file: Path
+ instrumented: bool
+ binaries: list[BinaryResult] = field(default_factory=list)
+
+
+def parse_binary_spec(spec: str) -> tuple[str, Path]:
+ """Parse a binary spec like 'name:/path/to/binary'.
+
+ Returns (name, path).
+ """
+ if ":" not in spec:
+ raise ValueError(f"Invalid binary spec '{spec}': must be NAME:PATH")
+ name, path_str = spec.split(":", 1)
+ if not name:
+ raise ValueError(f"Invalid binary spec '{spec}': name cannot be empty")
+ return name, Path(path_str)
+
+
+class BenchmarkPhase:
+ """Run hyperfine benchmarks on bitcoind binaries."""
+
+ def __init__(
+ self,
+ config: Config,
+ capabilities: Capabilities,
+ ):
+ self.config = config
+ self.capabilities = capabilities
+ self._temp_scripts: list[Path] = []
+
+ def run(
+ self,
+ binaries: list[tuple[str, Path]],
+ datadir: Path,
+ output_dir: Path,
+ ) -> BenchmarkResult:
+ """Run benchmarks on given binaries.
+
+ Args:
+ binaries: List of (name, binary_path) tuples
+ datadir: Source datadir with blockchain snapshot
+ output_dir: Where to store results
+
+ Returns:
+ BenchmarkResult with paths to outputs
+ """
+ if not binaries:
+ raise ValueError("At least one binary is required")
+
+ # Validate all binaries exist
+ for name, path in binaries:
+ if not path.exists():
+ raise FileNotFoundError(f"Binary not found: {path} ({name})")
+
+ # Ensure binaries can run on this system (patches guix binaries on NixOS)
+ for name, path in binaries:
+ if not ensure_binary_runnable(path):
+ raise RuntimeError(f"Binary {name} at {path} cannot be made runnable")
+
+ # Check prerequisites
+ errors = self.capabilities.check_for_run(self.config.instrumented)
+ if errors:
+ raise RuntimeError("Benchmark prerequisites not met:\n" + "\n".join(errors))
+
+ # Log warnings about missing optional capabilities
+ for warning in self.capabilities.get_warnings():
+ logger.warning(warning)
+
+ # Setup directories
+ output_dir.mkdir(parents=True, exist_ok=True)
+ tmp_datadir = Path(self.config.tmp_datadir)
+ tmp_datadir.mkdir(parents=True, exist_ok=True)
+
+ results_file = output_dir / "results.json"
+
+ logger.info("Starting benchmark")
+ logger.info(f" Output dir: {output_dir}")
+ logger.info(f" Temp datadir: {tmp_datadir}")
+ logger.info(f" Source datadir: {datadir}")
+ logger.info(f" Binaries: {len(binaries)}")
+ for name, path in binaries:
+ logger.info(f" {name}: {path}")
+ logger.info(f" Instrumented: {self.config.instrumented}")
+ logger.info(f" Runs: {self.config.runs}")
+ logger.info(f" Stop height: {self.config.stop_height}")
+ logger.info(f" dbcache: {self.config.dbcache}")
+
+ try:
+ # Create hook scripts for hyperfine
+ setup_script = self._create_setup_script(tmp_datadir)
+ prepare_script = self._create_prepare_script(tmp_datadir, datadir)
+ cleanup_script = self._create_cleanup_script(tmp_datadir)
+
+ # Build hyperfine command
+ cmd = self._build_hyperfine_cmd(
+ binaries=binaries,
+ tmp_datadir=tmp_datadir,
+ results_file=results_file,
+ setup_script=setup_script,
+ prepare_script=prepare_script,
+ cleanup_script=cleanup_script,
+ output_dir=output_dir,
+ )
+
+ # Log the commands being benchmarked
+ logger.info("Commands to benchmark:")
+ for name, path in binaries:
+ bitcoind_cmd = self._build_bitcoind_cmd(path, tmp_datadir)
+ logger.info(f" {name}: {bitcoind_cmd}")
+
+ if self.config.dry_run:
+ logger.info(f"[DRY RUN] Would run: {' '.join(cmd)}")
+ return BenchmarkResult(
+ results_file=results_file,
+ instrumented=self.config.instrumented,
+ )
+
+ # Log the full hyperfine command
+ logger.info("Running hyperfine...")
+ logger.info(f" Command: {' '.join(cmd[:7])} ...") # First few args
+ logger.debug(f" Full command: {' '.join(cmd)}")
+ subprocess.run(cmd, check=True)
+
+ # Collect results
+ benchmark_result = BenchmarkResult(
+ results_file=results_file,
+ instrumented=self.config.instrumented,
+ )
+
+ # For instrumented runs, collect flamegraphs and debug logs
+ if self.config.instrumented:
+ logger.info("Collecting instrumented artifacts...")
+ for name, _path in binaries:
+ binary_result = BinaryResult(name=name)
+
+ flamegraph_file = output_dir / f"{name}-flamegraph.svg"
+ debug_log_file = output_dir / f"{name}-debug.log"
+
+ if flamegraph_file.exists():
+ binary_result.flamegraph = flamegraph_file
+ logger.info(f" Flamegraph ({name}): {flamegraph_file}")
+ if debug_log_file.exists():
+ binary_result.debug_log = debug_log_file
+ logger.info(f" Debug log ({name}): {debug_log_file}")
+
+ benchmark_result.binaries.append(binary_result)
+
+ # Clean up tmp_datadir
+ if tmp_datadir.exists():
+ logger.debug(f"Cleaning up tmp_datadir: {tmp_datadir}")
+ shutil.rmtree(tmp_datadir)
+
+ return benchmark_result
+
+ finally:
+ # Clean up temp scripts
+ for script in self._temp_scripts:
+ if script.exists():
+ script.unlink()
+ self._temp_scripts.clear()
+
+ def _create_temp_script(self, commands: list[str], name: str) -> Path:
+ """Create a temporary shell script."""
+ content = "#!/usr/bin/env bash\nset -euxo pipefail\n"
+ content += "\n".join(commands) + "\n"
+
+ fd, path = tempfile.mkstemp(suffix=".sh", prefix=f"bench_{name}_")
+ os.write(fd, content.encode())
+ os.close(fd)
+ os.chmod(path, 0o755)
+
+ script_path = Path(path)
+ self._temp_scripts.append(script_path)
+ logger.debug(f"Created {name} script: {script_path}")
+ for cmd in commands:
+ logger.debug(f" {cmd}")
+ return script_path
+
+ def _create_setup_script(self, tmp_datadir: Path) -> Path:
+ """Create setup script (runs once before all timing runs)."""
+ commands = [
+ f'mkdir -p "{tmp_datadir}"',
+ f'rm -rf "{tmp_datadir}"/*',
+ ]
+ return self._create_temp_script(commands, "setup")
+
+ def _create_prepare_script(self, tmp_datadir: Path, original_datadir: Path) -> Path:
+ """Create prepare script (runs before each timing run)."""
+ commands = [
+ f'rm -rf "{tmp_datadir}"/*',
+ ]
+
+ # Copy datadir
+ commands.append(f'cp -r "{original_datadir}"/* "{tmp_datadir}"')
+
+ # Drop caches if available
+ if self.capabilities.can_drop_caches and not self.config.no_cache_drop:
+ commands.append(self.capabilities.drop_caches_path)
+
+ # Clean debug logs
+ commands.append(
+ f'find "{tmp_datadir}" -name debug.log -delete 2>/dev/null || true'
+ )
+
+ return self._create_temp_script(commands, "prepare")
+
+ def _create_cleanup_script(self, tmp_datadir: Path) -> Path:
+ """Create cleanup script (runs after all timing runs for each command)."""
+ commands = [
+ f'rm -rf "{tmp_datadir}"/*',
+ ]
+ return self._create_temp_script(commands, "cleanup")
+
+ def _build_bitcoind_cmd(
+ self,
+ binary: Path,
+ tmp_datadir: Path,
+ ) -> str:
+ """Build the bitcoind command string for hyperfine."""
+ parts = []
+
+ # Add flamegraph wrapper for instrumented mode
+ if self.config.instrumented:
+ parts.append("flamegraph")
+ parts.append("--palette bitcoin")
+ parts.append("--title 'bitcoind IBD'")
+ parts.append("-c 'record -F 101 --call-graph fp'")
+ parts.append("--")
+
+ # Bitcoind command
+ parts.append(str(binary))
+ parts.append(f"-datadir={tmp_datadir}")
+ parts.append(f"-dbcache={self.config.dbcache}")
+ parts.append(f"-stopatheight={self.config.stop_height}")
+ parts.append("-prune=10000")
+ parts.append(f"-chain={self.config.chain}")
+ parts.append("-daemon=0")
+ parts.append("-printtoconsole=0")
+
+ if self.config.connect:
+ parts.append(f"-connect={self.config.connect}")
+
+ # Debug flags for instrumented mode
+ if self.config.instrumented:
+ for flag in INSTRUMENTED_DEBUG_FLAGS:
+ parts.append(f"-debug={flag}")
+
+ return " ".join(parts)
+
+ def _build_hyperfine_cmd(
+ self,
+ binaries: list[tuple[str, Path]],
+ tmp_datadir: Path,
+ results_file: Path,
+ setup_script: Path,
+ prepare_script: Path,
+ cleanup_script: Path,
+ output_dir: Path,
+ ) -> list[str]:
+ """Build the hyperfine command."""
+ cmd = [
+ "hyperfine",
+ "--shell=bash",
+ f"--setup={setup_script}",
+ f"--prepare={prepare_script}",
+ f"--cleanup={cleanup_script}",
+ f"--runs={self.config.runs}",
+ f"--export-json={results_file}",
+ "--show-output",
+ ]
+
+ # Add command names and build commands
+ for name, binary_path in binaries:
+ cmd.append(f"--command-name={name}")
+
+ # Build the actual commands to benchmark
+ for name, binary_path in binaries:
+ bitcoind_cmd = self._build_bitcoind_cmd(binary_path, tmp_datadir)
+
+ # For instrumented runs, append the conclude logic to each command
+ if self.config.instrumented:
+ conclude = self._create_conclude_commands(name, tmp_datadir, output_dir)
+ bitcoind_cmd += f" && {conclude}"
+
+ cmd.append(bitcoind_cmd)
+
+ return cmd
+
+ def _create_conclude_commands(
+ self,
+ name: str,
+ tmp_datadir: Path,
+ output_dir: Path,
+ ) -> str:
+ """Create inline conclude commands for a specific binary."""
+ # Return shell commands to run after each benchmark
+ commands = []
+
+ # Move flamegraph if exists
+ commands.append(
+ f'if [ -e flamegraph.svg ]; then mv flamegraph.svg "{output_dir}/{name}-flamegraph.svg"; fi'
+ )
+
+ # Copy debug log if exists
+ commands.append(
+ f'debug_log=$(find "{tmp_datadir}" -name debug.log -print -quit); '
+ f'if [ -n "$debug_log" ]; then cp "$debug_log" "{output_dir}/{name}-debug.log"; fi'
+ )
+
+ return " && ".join(commands)
diff --git a/bench/build.py b/bench/build.py
new file mode 100644
index 000000000000..6187263a73de
--- /dev/null
+++ b/bench/build.py
@@ -0,0 +1,197 @@
+"""Build phase - compile bitcoind at specified commits."""
+
+from __future__ import annotations
+
+import logging
+import shutil
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from .capabilities import Capabilities
+ from .config import Config
+
+from .utils import GitState, git_checkout, git_rev_parse
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class BuiltBinary:
+ """A single built binary."""
+
+ name: str
+ path: Path
+ commit: str
+
+
+@dataclass
+class BuildResult:
+ """Result of the build phase."""
+
+ binaries: list[BuiltBinary]
+
+
+def parse_commit_spec(spec: str) -> tuple[str, str | None]:
+ """Parse a commit spec like 'abc123:name' or 'abc123'.
+
+ Returns (commit, name) where name may be None.
+ """
+ if ":" in spec:
+ commit, name = spec.split(":", 1)
+ return commit, name
+ return spec, None
+
+
+class BuildPhase:
+ """Build bitcoind binaries at specified commits."""
+
+ def __init__(
+ self,
+ config: Config,
+ capabilities: Capabilities,
+ repo_path: Path | None = None,
+ ):
+ self.config = config
+ self.capabilities = capabilities
+ self.repo_path = repo_path or Path.cwd()
+
+ def run(
+ self,
+ commit_specs: list[str],
+ output_dir: Path | None = None,
+ ) -> BuildResult:
+ """Build bitcoind at given commits.
+
+ Args:
+ commit_specs: List of commit specs like 'abc123:name' or 'abc123'
+ output_dir: Where to store binaries (default: ./binaries)
+
+ Returns:
+ BuildResult with list of built binaries
+ """
+ # Check prerequisites
+ errors = self.capabilities.check_for_build()
+ if errors:
+ raise RuntimeError("Build prerequisites not met:\n" + "\n".join(errors))
+
+ output_dir = output_dir or Path(self.config.binaries_dir)
+
+ # Parse commit specs and resolve to full hashes
+ commits: list[tuple[str, str, str]] = [] # (commit_hash, name, original_spec)
+ for spec in commit_specs:
+ commit, name = parse_commit_spec(spec)
+ commit_hash = git_rev_parse(commit, self.repo_path)
+ # Default name to short hash if not provided
+ if name is None:
+ name = commit_hash[:12]
+ commits.append((commit_hash, name, spec))
+
+ logger.info(f"Building {len(commits)} binary(ies):")
+ for commit_hash, name, spec in commits:
+ logger.info(f" {name}: {commit_hash[:12]} ({spec})")
+ logger.info(f" Repo: {self.repo_path}")
+ logger.info(f" Output: {output_dir}")
+
+ # Check if we can skip existing builds
+ binaries_to_build: list[
+ tuple[str, str, Path]
+ ] = [] # (commit_hash, name, output_path)
+ for commit_hash, name, _spec in commits:
+ binary_dir = output_dir / name
+ binary_dir.mkdir(parents=True, exist_ok=True)
+ binary_path = binary_dir / "bitcoind"
+
+ if self.config.skip_existing and binary_path.exists():
+ logger.info(f" Skipping {name} - binary exists")
+ else:
+ binaries_to_build.append((commit_hash, name, binary_path))
+
+ if not binaries_to_build:
+ logger.info("All binaries exist and --skip-existing set, skipping build")
+ return BuildResult(
+ binaries=[
+ BuiltBinary(
+ name=name,
+ path=output_dir / name / "bitcoind",
+ commit=commit_hash,
+ )
+ for commit_hash, name, _spec in commits
+ ]
+ )
+
+ # Save git state for restoration
+ git_state = GitState(self.repo_path)
+ git_state.save()
+
+ built_binaries: list[BuiltBinary] = []
+
+ try:
+ for commit_hash, name, output_path in binaries_to_build:
+ self._build_commit(name, commit_hash, output_path)
+ built_binaries.append(
+ BuiltBinary(name=name, path=output_path, commit=commit_hash)
+ )
+
+ finally:
+ # Always restore git state
+ git_state.restore()
+
+ # Include skipped binaries in result
+ all_binaries = []
+ for commit_hash, name, _spec in commits:
+ binary_path = output_dir / name / "bitcoind"
+ all_binaries.append(
+ BuiltBinary(name=name, path=binary_path, commit=commit_hash)
+ )
+
+ return BuildResult(binaries=all_binaries)
+
+ def _build_commit(self, name: str, commit: str, output_path: Path) -> None:
+ """Build bitcoind for a single commit."""
+ logger.info(f"Building {name} ({commit[:12]})")
+
+ if self.config.dry_run:
+ logger.info(f" [DRY RUN] Would build {commit[:12]} -> {output_path}")
+ return
+
+ # Checkout the commit
+ logger.info(f" Checking out {commit[:12]}...")
+ git_checkout(commit, self.repo_path)
+
+ # Build with nix
+ cmd = ["nix", "build", "-L"]
+
+ logger.info(f" Running: {' '.join(cmd)}")
+ logger.info(f" Working directory: {self.repo_path}")
+ result = subprocess.run(
+ cmd,
+ cwd=self.repo_path,
+ )
+
+ if result.returncode != 0:
+ raise RuntimeError(f"Build failed for {name} ({commit[:12]})")
+
+ # Copy binary to output location
+ nix_binary = self.repo_path / "result" / "bin" / "bitcoind"
+ if not nix_binary.exists():
+ raise RuntimeError(f"Built binary not found at {nix_binary}")
+
+ logger.info(f" Copying {nix_binary} -> {output_path}")
+
+ # Remove existing binary if present (may be read-only from nix)
+ if output_path.exists():
+ output_path.chmod(0o755)
+ output_path.unlink()
+
+ shutil.copy2(nix_binary, output_path)
+ output_path.chmod(0o755) # Ensure it's executable and writable
+ logger.info(f" Built {name} binary: {output_path}")
+
+ # Clean up nix result symlink
+ result_link = self.repo_path / "result"
+ if result_link.is_symlink():
+ logger.debug(f" Removing nix result symlink: {result_link}")
+ result_link.unlink()
diff --git a/bench/capabilities.py b/bench/capabilities.py
new file mode 100644
index 000000000000..31b6bd59f05f
--- /dev/null
+++ b/bench/capabilities.py
@@ -0,0 +1,117 @@
+"""System capability detection for graceful degradation.
+
+Detects available tools and features, allowing the benchmark to run
+on systems without all capabilities (with appropriate warnings).
+"""
+
+from __future__ import annotations
+
+import os
+import shutil
+from dataclasses import dataclass
+from pathlib import Path
+
+
+# Known paths for drop-caches on NixOS
+DROP_CACHES_PATHS = [
+ "/run/wrappers/bin/drop-caches",
+ "/usr/local/bin/drop-caches",
+]
+
+
+@dataclass
+class Capabilities:
+ """Detected system capabilities."""
+
+ # Cache management
+ can_drop_caches: bool
+ drop_caches_path: str | None
+
+ # Required tools
+ has_hyperfine: bool
+ has_flamegraph: bool
+ has_perf: bool
+ has_nix: bool
+
+ # System info
+ cpu_count: int
+ is_nixos: bool
+ is_ci: bool
+
+ def check_for_run(self, instrumented: bool = False) -> list[str]:
+ """Check if we have required capabilities for a benchmark run.
+
+ Returns list of errors (empty if all good).
+ """
+ errors = []
+
+ if not self.has_hyperfine:
+ errors.append("hyperfine not found in PATH (required for benchmarking)")
+
+ if instrumented:
+ if not self.has_flamegraph:
+ errors.append(
+ "flamegraph not found in PATH (required for --instrumented)"
+ )
+ if not self.has_perf:
+ errors.append("perf not found in PATH (required for --instrumented)")
+
+ return errors
+
+ def check_for_build(self) -> list[str]:
+ """Check if we have required capabilities for building.
+
+ Returns list of errors (empty if all good).
+ """
+ errors = []
+
+ if not self.has_nix:
+ errors.append("nix not found in PATH (required for building)")
+
+ return errors
+
+ def get_warnings(self) -> list[str]:
+ """Get warnings about missing optional capabilities."""
+ warnings = []
+
+ if not self.can_drop_caches:
+ warnings.append(
+ "drop-caches not available - cache won't be cleared between runs"
+ )
+
+ return warnings
+
+
+def _check_executable(name: str) -> bool:
+ """Check if an executable is available in PATH."""
+ return shutil.which(name) is not None
+
+
+def _find_drop_caches() -> str | None:
+ """Find drop-caches executable."""
+ for path in DROP_CACHES_PATHS:
+ if Path(path).exists() and os.access(path, os.X_OK):
+ return path
+ return None
+
+
+def _is_nixos() -> bool:
+ """Check if we're running on NixOS."""
+ return Path("/etc/NIXOS").exists()
+
+
+def detect_capabilities() -> Capabilities:
+ """Auto-detect system capabilities."""
+ drop_caches_path = _find_drop_caches()
+
+ return Capabilities(
+ can_drop_caches=drop_caches_path is not None,
+ drop_caches_path=drop_caches_path,
+ has_hyperfine=_check_executable("hyperfine"),
+ has_flamegraph=_check_executable("flamegraph"),
+ has_perf=_check_executable("perf"),
+ has_nix=_check_executable("nix"),
+ cpu_count=os.cpu_count() or 1,
+ is_nixos=_is_nixos(),
+ is_ci=os.environ.get("CI", "").lower() in ("true", "1", "yes"),
+ )
diff --git a/bench/compare.py b/bench/compare.py
new file mode 100644
index 000000000000..fac328841634
--- /dev/null
+++ b/bench/compare.py
@@ -0,0 +1,180 @@
+"""Compare phase - compare benchmark results from multiple runs."""
+
+from __future__ import annotations
+
+import json
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class BenchmarkEntry:
+ """A single benchmark entry from results.json."""
+
+ command: str
+ mean: float
+ stddev: float | None
+ user: float
+ system: float
+ min: float
+ max: float
+ times: list[float]
+
+
+@dataclass
+class Comparison:
+ """Comparison of one entry against the baseline."""
+
+ name: str
+ mean: float
+ baseline_mean: float
+ speedup_percent: float
+ stddev: float | None
+
+
+@dataclass
+class CompareResult:
+ """Result of comparison."""
+
+ baseline: str
+ comparisons: list[Comparison]
+
+
+class ComparePhase:
+ """Compare benchmark results from multiple results.json files."""
+
+ def run(
+ self,
+ results_files: list[Path],
+ baseline: str | None = None,
+ ) -> CompareResult:
+ """Compare benchmark results.
+
+ Args:
+ results_files: List of results.json files to compare
+ baseline: Name of the baseline entry (default: first entry)
+
+ Returns:
+ CompareResult with comparison data
+ """
+ if not results_files:
+ raise ValueError("At least one results file is required")
+
+ # Load all entries from all files
+ all_entries: list[BenchmarkEntry] = []
+ for results_file in results_files:
+ if not results_file.exists():
+ raise FileNotFoundError(f"Results file not found: {results_file}")
+
+ logger.info(f"Loading results from: {results_file}")
+ with open(results_file) as f:
+ data = json.load(f)
+
+ entries = self._parse_results(data)
+ logger.info(f" Found {len(entries)} entries")
+ all_entries.extend(entries)
+
+ if not all_entries:
+ raise ValueError("No benchmark entries found in results files")
+
+ # Determine baseline
+ if baseline is None:
+ baseline = all_entries[0].command
+ logger.info(f"Using baseline: {baseline}")
+
+ # Find baseline entry
+ baseline_entry = None
+ for entry in all_entries:
+ if entry.command == baseline:
+ baseline_entry = entry
+ break
+
+ if baseline_entry is None:
+ available = [e.command for e in all_entries]
+ raise ValueError(
+ f"Baseline '{baseline}' not found. Available: {', '.join(available)}"
+ )
+
+ # Calculate comparisons
+ comparisons: list[Comparison] = []
+ for entry in all_entries:
+ if entry.command == baseline:
+ continue
+
+ speedup = self._calculate_speedup(baseline_entry.mean, entry.mean)
+ comparisons.append(
+ Comparison(
+ name=entry.command,
+ mean=entry.mean,
+ baseline_mean=baseline_entry.mean,
+ speedup_percent=speedup,
+ stddev=entry.stddev,
+ )
+ )
+
+ # Log results
+ logger.info("Comparison results:")
+ logger.info(f" Baseline ({baseline}): {baseline_entry.mean:.3f}s")
+ for comp in comparisons:
+ sign = "+" if comp.speedup_percent > 0 else ""
+ logger.info(
+ f" {comp.name}: {comp.mean:.3f}s ({sign}{comp.speedup_percent:.1f}%)"
+ )
+
+ return CompareResult(
+ baseline=baseline,
+ comparisons=comparisons,
+ )
+
+ def _parse_results(self, data: dict) -> list[BenchmarkEntry]:
+ """Parse results from hyperfine JSON output."""
+ entries = []
+
+ results = data.get("results", [])
+ for result in results:
+ entries.append(
+ BenchmarkEntry(
+ command=result.get("command", "unknown"),
+ mean=result.get("mean", 0),
+ stddev=result.get("stddev"),
+ user=result.get("user", 0),
+ system=result.get("system", 0),
+ min=result.get("min", 0),
+ max=result.get("max", 0),
+ times=result.get("times", []),
+ )
+ )
+
+ return entries
+
+ def _calculate_speedup(self, baseline_mean: float, other_mean: float) -> float:
+ """Calculate speedup percentage.
+
+ Positive = faster than baseline
+ Negative = slower than baseline
+ """
+ if baseline_mean == 0:
+ return 0.0
+ return round(((baseline_mean - other_mean) / baseline_mean) * 100, 1)
+
+ def to_json(self, result: CompareResult) -> str:
+ """Convert comparison result to JSON."""
+ return json.dumps(
+ {
+ "baseline": result.baseline,
+ "comparisons": [
+ {
+ "name": c.name,
+ "mean": c.mean,
+ "baseline_mean": c.baseline_mean,
+ "speedup_percent": c.speedup_percent,
+ "stddev": c.stddev,
+ }
+ for c in result.comparisons
+ ],
+ },
+ indent=2,
+ )
diff --git a/bench/config.py b/bench/config.py
new file mode 100644
index 000000000000..7991fee31bff
--- /dev/null
+++ b/bench/config.py
@@ -0,0 +1,231 @@
+"""Configuration management for benchcoin.
+
+Layered configuration (lowest to highest priority):
+1. Built-in defaults
+2. bench.toml config file
+3. Environment variables (BENCH_*)
+4. CLI arguments
+"""
+
+from __future__ import annotations
+
+import os
+import tomllib
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+
+# Built-in defaults
+DEFAULTS = {
+ "chain": "main",
+ "dbcache": 450,
+ "stop_height": 855000,
+ "runs": 3,
+ "connect": "", # Empty = use public P2P network
+ "binaries_dir": "./binaries",
+ "output_dir": "./bench-output",
+}
+
+# Profile overrides
+PROFILES = {
+ "quick": {
+ "stop_height": 1500,
+ "runs": 1,
+ },
+ "full": {
+ "stop_height": 855000,
+ "runs": 3,
+ },
+ "ci": {
+ "stop_height": 855000,
+ "runs": 3,
+ "connect": "148.251.128.115:33333",
+ },
+}
+
+# Environment variable mapping
+ENV_MAPPING = {
+ "BENCH_DATADIR": "datadir",
+ "BENCH_TMP_DATADIR": "tmp_datadir",
+ "BENCH_BINARIES_DIR": "binaries_dir",
+ "BENCH_OUTPUT_DIR": "output_dir",
+ "BENCH_STOP_HEIGHT": "stop_height",
+ "BENCH_DBCACHE": "dbcache",
+ "BENCH_CONNECT": "connect",
+ "BENCH_RUNS": "runs",
+ "BENCH_CHAIN": "chain",
+}
+
+
+@dataclass
+class Config:
+ """Benchmark configuration."""
+
+ # Core benchmark settings
+ chain: str = "main"
+ dbcache: int = 450
+ stop_height: int = 855000
+ runs: int = 3
+ connect: str = "" # Empty = use public P2P network
+
+ # Paths
+ datadir: str | None = None
+ tmp_datadir: str | None = None
+ binaries_dir: str = "./binaries"
+ output_dir: str = "./bench-output"
+
+ # Behavior flags
+ instrumented: bool = False
+ skip_existing: bool = False
+ no_cache_drop: bool = False
+ verbose: bool = False
+ dry_run: bool = False
+
+ # Profile used (for reference)
+ profile: str = "full"
+
+ def __post_init__(self) -> None:
+ # If tmp_datadir not set, derive from output_dir
+ if self.tmp_datadir is None:
+ self.tmp_datadir = str(Path(self.output_dir) / "tmp-datadir")
+
+ # Instrumented mode forces runs=1
+ if self.instrumented and self.runs != 1:
+ self.runs = 1
+
+ def validate(self) -> list[str]:
+ """Validate configuration, return list of errors."""
+ errors = []
+
+ if self.datadir is None:
+ errors.append("--datadir is required")
+ elif not Path(self.datadir).exists():
+ errors.append(f"datadir does not exist: {self.datadir}")
+
+ if self.stop_height < 1:
+ errors.append("stop_height must be positive")
+
+ if self.dbcache < 1:
+ errors.append("dbcache must be positive")
+
+ if self.runs < 1:
+ errors.append("runs must be positive")
+
+ if self.chain not in ("main", "testnet", "signet", "regtest"):
+ errors.append(f"invalid chain: {self.chain}")
+
+ return errors
+
+
+def load_toml(path: Path) -> tuple[dict[str, Any], dict[str, dict[str, Any]]]:
+ """Load configuration from TOML file.
+
+ Returns:
+ Tuple of (base_config, profiles_dict)
+ """
+ if not path.exists():
+ return {}, {}
+
+ with open(path, "rb") as f:
+ data = tomllib.load(f)
+
+ # Flatten structure: merge [defaults] and [paths] into top level
+ result = {}
+ if "defaults" in data:
+ result.update(data["defaults"])
+ if "paths" in data:
+ result.update(data["paths"])
+
+ # Extract profiles
+ profiles = data.get("profiles", {})
+
+ return result, profiles
+
+
+def load_env() -> dict[str, Any]:
+ """Load configuration from environment variables."""
+ result = {}
+
+ for env_var, config_key in ENV_MAPPING.items():
+ value = os.environ.get(env_var)
+ if value is not None:
+ # Convert numeric values
+ if config_key in ("stop_height", "dbcache", "runs"):
+ try:
+ value = int(value)
+ except ValueError:
+ pass # Keep as string, will fail validation
+ result[config_key] = value
+
+ return result
+
+
+def apply_profile(
+ config: dict[str, Any],
+ profile_name: str,
+ toml_profiles: dict[str, dict[str, Any]] | None = None,
+) -> dict[str, Any]:
+ """Apply a named profile to configuration.
+
+ Args:
+ config: Base configuration dict
+ profile_name: Name of profile to apply
+ toml_profiles: Profiles loaded from TOML file (override built-in)
+ """
+ result = config.copy()
+ result["profile"] = profile_name
+
+ # Apply built-in profile first
+ if profile_name in PROFILES:
+ result.update(PROFILES[profile_name])
+
+ # Then apply TOML profile (overrides built-in)
+ if toml_profiles and profile_name in toml_profiles:
+ result.update(toml_profiles[profile_name])
+
+ return result
+
+
+def build_config(
+ cli_args: dict[str, Any] | None = None,
+ config_file: Path | None = None,
+ profile: str = "full",
+) -> Config:
+ """Build configuration from all sources.
+
+ Priority (lowest to highest):
+ 1. Built-in defaults
+ 2. Config file (bench.toml) base settings
+ 3. Built-in profile overrides
+ 4. Config file profile overrides
+ 5. Environment variables
+ 6. CLI arguments
+ """
+ # Start with defaults
+ config = DEFAULTS.copy()
+
+ # Load config file
+ if config_file is None:
+ config_file = Path("bench.toml")
+ file_config, toml_profiles = load_toml(config_file)
+ config.update(file_config)
+
+ # Apply profile (built-in first, then TOML overrides)
+ config = apply_profile(config, profile, toml_profiles)
+
+ # Load environment variables
+ env_config = load_env()
+ config.update(env_config)
+
+ # Apply CLI arguments (filter out None values)
+ if cli_args:
+ for key, value in cli_args.items():
+ if value is not None:
+ config[key] = value
+
+ # Build Config object (filter to only valid fields)
+ valid_fields = {f.name for f in Config.__dataclass_fields__.values()}
+ filtered = {k: v for k, v in config.items() if k in valid_fields}
+
+ return Config(**filtered)
diff --git a/bench/patchelf.py b/bench/patchelf.py
new file mode 100644
index 000000000000..6da1e00867cf
--- /dev/null
+++ b/bench/patchelf.py
@@ -0,0 +1,135 @@
+"""Patchelf utilities for fixing guix-built binaries on NixOS."""
+
+from __future__ import annotations
+
+import logging
+import os
+import subprocess
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+def get_nix_interpreter() -> str | None:
+ """Get the path to the nix store's dynamic linker.
+
+ Returns None if not on NixOS or can't find it.
+ """
+ # Check if we're on NixOS
+ if not Path("/etc/NIXOS").exists():
+ return None
+
+ # Find the interpreter from the current glibc
+ # We can get this by checking what the current shell uses
+ try:
+ result = subprocess.run(
+ ["patchelf", "--print-interpreter", "/bin/sh"],
+ capture_output=True,
+ text=True,
+ )
+ if result.returncode == 0:
+ interp = result.stdout.strip()
+ if interp and Path(interp).exists():
+ return interp
+ except FileNotFoundError:
+ pass
+
+ return None
+
+
+def get_binary_interpreter(binary: Path) -> str | None:
+ """Get the interpreter (dynamic linker) of a binary."""
+ try:
+ result = subprocess.run(
+ ["patchelf", "--print-interpreter", str(binary)],
+ capture_output=True,
+ text=True,
+ )
+ if result.returncode == 0:
+ return result.stdout.strip()
+ except FileNotFoundError:
+ logger.debug("patchelf not found")
+ return None
+
+
+def needs_patching(binary: Path) -> bool:
+ """Check if a binary needs to be patched for NixOS.
+
+ Returns True if:
+ - We're on NixOS
+ - The binary has a non-nix interpreter (e.g., /lib64/ld-linux-x86-64.so.2)
+ """
+ nix_interp = get_nix_interpreter()
+ if not nix_interp:
+ # Not on NixOS, no patching needed
+ return False
+
+ binary_interp = get_binary_interpreter(binary)
+ if not binary_interp:
+ # Can't determine interpreter, assume no patching needed
+ return False
+
+ # Check if the binary's interpreter is already in the nix store
+ if binary_interp.startswith("/nix/store/"):
+ return False
+
+ # Binary uses a non-nix interpreter (e.g., /lib64/...)
+ return True
+
+
+def patch_binary(binary: Path) -> bool:
+ """Patch a binary to use the nix store's dynamic linker.
+
+ Returns True if patching was successful or not needed.
+ """
+ if not needs_patching(binary):
+ logger.debug(f"Binary {binary} does not need patching")
+ return True
+
+ nix_interp = get_nix_interpreter()
+ if not nix_interp:
+ logger.warning("Cannot patch binary: unable to find nix interpreter")
+ return False
+
+ original_interp = get_binary_interpreter(binary)
+ logger.info(f"Patching binary: {binary}")
+ logger.info(f" Original interpreter: {original_interp}")
+ logger.info(f" New interpreter: {nix_interp}")
+
+ # Make sure binary is writable
+ try:
+ os.chmod(binary, 0o755)
+ except OSError as e:
+ logger.warning(f"Could not make binary writable: {e}")
+
+ try:
+ result = subprocess.run(
+ ["patchelf", "--set-interpreter", nix_interp, str(binary)],
+ capture_output=True,
+ text=True,
+ )
+ if result.returncode != 0:
+ logger.error(f"patchelf failed: {result.stderr}")
+ return False
+ logger.info(" Patching successful")
+ return True
+ except FileNotFoundError:
+ logger.error("patchelf not found - install it or use nix develop")
+ return False
+
+
+def ensure_binary_runnable(binary: Path) -> bool:
+ """Ensure a binary can run on this system.
+
+ Patches the binary if necessary (on NixOS with non-nix binaries).
+ Returns True if the binary should be runnable.
+ """
+ if not binary.exists():
+ logger.error(f"Binary not found: {binary}")
+ return False
+
+ # Check if patching is needed and do it
+ if needs_patching(binary):
+ return patch_binary(binary)
+
+ return True
diff --git a/bench/report.py b/bench/report.py
new file mode 100644
index 000000000000..7f95d0c1ea47
--- /dev/null
+++ b/bench/report.py
@@ -0,0 +1,664 @@
+"""Report phase - generate HTML reports from benchmark results.
+
+Ported from the JavaScript logic in .github/workflows/publish-results.yml.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+import shutil
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+# HTML template for individual run report
+RUN_REPORT_TEMPLATE = """
+
+
+ Benchmark Results
+
+
+
+
+
Benchmark Results
+
+
{title}
+
+
+
Run Data
+
+
+
+
+ | Network |
+ Command |
+ Mean (s) |
+ Std Dev |
+ User (s) |
+ System (s) |
+
+
+
+ {run_data_rows}
+
+
+
+
+
+
Speedup Summary
+
+
+
+
+ | Network |
+ Speedup (%) |
+
+
+
+ {speedup_rows}
+
+
+
+
+
+ {graphs_section}
+
+
+
+"""
+
+# HTML template for main index
+INDEX_TEMPLATE = """
+
+
+ Bitcoin Benchmark Results
+
+
+
+
+
Bitcoin Benchmark Results
+
+
+
+"""
+
+
+@dataclass
+class BenchmarkRun:
+ """Parsed benchmark run data."""
+
+ network: str
+ command: str
+ mean: float
+ stddev: float | None
+ user: float
+ system: float
+ parameters: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class ReportResult:
+ """Result of report generation."""
+
+ output_dir: Path
+ index_file: Path
+ speedups: dict[str, float]
+
+
+class ReportGenerator:
+ """Generate HTML reports from benchmark results."""
+
+ def __init__(
+ self, repo_url: str = "https://github.com/bitcoin-dev-tools/benchcoin"
+ ):
+ self.repo_url = repo_url
+
+ def generate_multi_network(
+ self,
+ network_dirs: dict[str, Path],
+ output_dir: Path,
+ title: str = "Benchmark Results",
+ pr_number: str | None = None,
+ run_id: str | None = None,
+ ) -> ReportResult:
+ """Generate HTML report from multiple network benchmark results.
+
+ Args:
+ network_dirs: Dict mapping network name to directory containing results.json
+ output_dir: Where to write the HTML report
+ title: Title for the report
+ pr_number: PR number (for CI reports)
+ run_id: Run ID (for CI reports)
+
+ Returns:
+ ReportResult with paths and speedup data
+ """
+ output_dir.mkdir(parents=True, exist_ok=True)
+
+ # Combine results from all networks
+ all_runs: list[BenchmarkRun] = []
+ for network, input_dir in network_dirs.items():
+ results_file = input_dir / "results.json"
+ if not results_file.exists():
+ logger.warning(
+ f"results.json not found in {input_dir} for network {network}"
+ )
+ continue
+
+ with open(results_file) as f:
+ data = json.load(f)
+
+ # Parse and add network to each run
+ for result in data.get("results", []):
+ all_runs.append(
+ BenchmarkRun(
+ network=network,
+ command=result.get("command", ""),
+ mean=result.get("mean", 0),
+ stddev=result.get("stddev"),
+ user=result.get("user", 0),
+ system=result.get("system", 0),
+ parameters=result.get("parameters", {}),
+ )
+ )
+
+ # Copy artifacts from this network
+ self._copy_network_artifacts(network, input_dir, output_dir)
+
+ if not all_runs:
+ raise ValueError("No benchmark results found in any network directory")
+
+ # Calculate speedups per network
+ speedups = self._calculate_speedups_per_network(all_runs)
+
+ # Build title with PR/run info if provided
+ full_title = title
+ if pr_number and run_id:
+ full_title = f"PR #{pr_number} - Run {run_id}"
+
+ # Generate HTML
+ html = self._generate_html(
+ all_runs, speedups, full_title, output_dir, output_dir
+ )
+
+ # Write report
+ index_file = output_dir / "index.html"
+ index_file.write_text(html)
+ logger.info(f"Generated report: {index_file}")
+
+ # Write combined results.json
+ combined_results = {
+ "results": [
+ {
+ "network": run.network,
+ "command": run.command,
+ "mean": run.mean,
+ "stddev": run.stddev,
+ "user": run.user,
+ "system": run.system,
+ }
+ for run in all_runs
+ ],
+ "speedups": speedups,
+ }
+ results_file = output_dir / "results.json"
+ results_file.write_text(json.dumps(combined_results, indent=2))
+
+ return ReportResult(
+ output_dir=output_dir,
+ index_file=index_file,
+ speedups=speedups,
+ )
+
+ def generate(
+ self,
+ input_dir: Path,
+ output_dir: Path,
+ title: str = "Benchmark Results",
+ ) -> ReportResult:
+ """Generate HTML report from benchmark artifacts.
+
+ Args:
+ input_dir: Directory containing results.json and artifacts
+ output_dir: Where to write the HTML report
+ title: Title for the report
+
+ Returns:
+ ReportResult with paths and speedup data
+ """
+ output_dir.mkdir(parents=True, exist_ok=True)
+
+ # Load results.json
+ results_file = input_dir / "results.json"
+ if not results_file.exists():
+ raise FileNotFoundError(f"results.json not found in {input_dir}")
+
+ with open(results_file) as f:
+ data = json.load(f)
+
+ # Parse results
+ runs = self._parse_results(data)
+
+ # Calculate speedups
+ speedups = self._calculate_speedups(runs)
+
+ # Generate HTML
+ html = self._generate_html(runs, speedups, title, input_dir, output_dir)
+
+ # Write report
+ index_file = output_dir / "index.html"
+ index_file.write_text(html)
+ logger.info(f"Generated report: {index_file}")
+
+ # Copy artifacts (flamegraphs, plots)
+ self._copy_artifacts(input_dir, output_dir)
+
+ return ReportResult(
+ output_dir=output_dir,
+ index_file=index_file,
+ speedups=speedups,
+ )
+
+ def generate_index(
+ self,
+ results_dir: Path,
+ output_file: Path,
+ ) -> None:
+ """Generate main index.html listing all available results.
+
+ Args:
+ results_dir: Directory containing pr-* subdirectories
+ output_file: Where to write index.html
+ """
+ runs = []
+
+ if results_dir.exists():
+ for pr_dir in sorted(results_dir.iterdir()):
+ if pr_dir.is_dir() and pr_dir.name.startswith("pr-"):
+ pr_num = pr_dir.name.replace("pr-", "")
+ pr_runs = []
+ for run_dir in sorted(pr_dir.iterdir()):
+ if run_dir.is_dir():
+ pr_runs.append(run_dir.name)
+ if pr_runs:
+ runs.append((pr_num, pr_runs))
+
+ run_list_html = ""
+ for pr_num, pr_runs in runs:
+ run_links = "\n".join(
+ f'Run {run}'
+ for run in pr_runs
+ )
+ run_list_html += f"""
+ PR #{pr_num}
+
+
+ """
+
+ html = INDEX_TEMPLATE.format(run_list=run_list_html)
+ output_file.write_text(html)
+ logger.info(f"Generated index: {output_file}")
+
+ def _parse_results(self, data: dict) -> list[BenchmarkRun]:
+ """Parse results from hyperfine JSON output."""
+ runs = []
+
+ # Handle both direct hyperfine output and combined results format
+ results = data.get("results", [])
+
+ for result in results:
+ runs.append(
+ BenchmarkRun(
+ network=result.get("network", "default"),
+ command=result.get("command", ""),
+ mean=result.get("mean", 0),
+ stddev=result.get("stddev"),
+ user=result.get("user", 0),
+ system=result.get("system", 0),
+ parameters=result.get("parameters", {}),
+ )
+ )
+
+ return runs
+
+ def _calculate_speedups(self, runs: list[BenchmarkRun]) -> dict[str, float]:
+ """Calculate speedup percentages.
+
+ Uses the first entry as baseline and compares all others against it.
+ Returns a dict mapping command name to speedup percentage.
+ """
+ speedups = {}
+
+ if len(runs) < 2:
+ return speedups
+
+ # Use first run as baseline
+ baseline = runs[0]
+ baseline_mean = baseline.mean
+
+ if baseline_mean <= 0:
+ return speedups
+
+ # Calculate speedup for each other run
+ for run in runs[1:]:
+ speedup = ((baseline_mean - run.mean) / baseline_mean) * 100
+ # Use command name as key, extracting just the name part
+ name = run.command
+ speedups[name] = round(speedup, 1)
+
+ return speedups
+
+ def _calculate_speedups_per_network(
+ self, runs: list[BenchmarkRun]
+ ) -> dict[str, float]:
+ """Calculate speedup percentages per network.
+
+ For each network, uses 'base' as baseline and calculates speedup for 'head'.
+ Returns a dict mapping network name to speedup percentage.
+ """
+ speedups = {}
+
+ # Group runs by network
+ networks: dict[str, list[BenchmarkRun]] = {}
+ for run in runs:
+ if run.network not in networks:
+ networks[run.network] = []
+ networks[run.network].append(run)
+
+ # Calculate speedup for each network
+ for network, network_runs in networks.items():
+ base_mean = None
+ head_mean = None
+
+ for run in network_runs:
+ if run.command == "base":
+ base_mean = run.mean
+ elif run.command == "head":
+ head_mean = run.mean
+
+ if base_mean and head_mean and base_mean > 0:
+ speedup = ((base_mean - head_mean) / base_mean) * 100
+ speedups[network] = round(speedup, 1)
+
+ return speedups
+
+ def _copy_network_artifacts(
+ self, network: str, input_dir: Path, output_dir: Path
+ ) -> None:
+ """Copy artifacts from a network directory with network prefix."""
+ # Copy flamegraphs with network prefix
+ for svg in input_dir.glob("*-flamegraph.svg"):
+ dest = output_dir / f"{network}-{svg.name}"
+ shutil.copy2(svg, dest)
+ logger.debug(f"Copied {svg.name} as {dest.name}")
+
+ # Copy plots directory with network prefix
+ plots_dir = input_dir / "plots"
+ if plots_dir.exists():
+ dest_plots = output_dir / f"{network}-plots"
+ if dest_plots.exists():
+ shutil.rmtree(dest_plots)
+ shutil.copytree(plots_dir, dest_plots)
+ logger.debug(f"Copied plots to {dest_plots}")
+
+ def _generate_html(
+ self,
+ runs: list[BenchmarkRun],
+ speedups: dict[str, float],
+ title: str,
+ input_dir: Path,
+ output_dir: Path,
+ ) -> str:
+ """Generate the HTML report."""
+ # Sort runs by network then by command (base first)
+ sorted_runs = sorted(
+ runs,
+ key=lambda r: (r.network, 0 if "base" in r.command.lower() else 1),
+ )
+
+ # Generate run data rows
+ run_data_rows = ""
+ for run in sorted_runs:
+ # Create commit link if there's a commit hash in the command
+ command_html = self._linkify_commit(run.command)
+
+ stddev_str = f"{run.stddev:.3f}" if run.stddev else "N/A"
+
+ run_data_rows += f"""
+
+ | {run.network} |
+ {command_html} |
+ {run.mean:.3f} |
+ {stddev_str} |
+ {run.user:.3f} |
+ {run.system:.3f} |
+
+ """
+
+ # Generate speedup rows
+ speedup_rows = ""
+ if sorted_runs:
+ # Add baseline row
+ baseline = sorted_runs[0]
+ speedup_rows += f"""
+
+ | {baseline.command} (baseline) |
+ - |
+
+ """
+ for name, speedup in speedups.items():
+ color_class = ""
+ if speedup > 0:
+ color_class = "text-green-600"
+ elif speedup < 0:
+ color_class = "text-red-600"
+
+ sign = "+" if speedup > 0 else ""
+ speedup_rows += f"""
+
+ | {name} |
+ {sign}{speedup}% |
+
+ """
+
+ # Generate graphs section
+ graphs_section = self._generate_graphs_section(runs, input_dir, output_dir)
+
+ return RUN_REPORT_TEMPLATE.format(
+ title=title,
+ run_data_rows=run_data_rows,
+ speedup_rows=speedup_rows,
+ graphs_section=graphs_section,
+ )
+
+ def _linkify_commit(self, command: str) -> str:
+ """Convert commit hashes in command to links."""
+
+ def replace_commit(match):
+ commit = match.group(1)
+ short_commit = commit[:8] if len(commit) > 8 else commit
+ return f'({short_commit})'
+
+ return re.sub(r"\(([a-f0-9]{7,40})\)", replace_commit, command)
+
+ def _generate_graphs_section(
+ self,
+ runs: list[BenchmarkRun],
+ input_dir: Path,
+ output_dir: Path,
+ ) -> str:
+ """Generate the flamegraphs and plots section."""
+ graphs_html = ""
+
+ for run in runs:
+ # Use the command/name directly (e.g., "base", "head")
+ name = run.command
+ network = run.network
+
+ # Check for flamegraph - try both with and without network prefix
+ # Network-prefixed: {network}-{name}-flamegraph.svg (for multi-network reports)
+ # Non-prefixed: {name}-flamegraph.svg (for single-network reports)
+ flamegraph_name = None
+ flamegraph_path = None
+
+ network_prefixed = f"{network}-{name}-flamegraph.svg"
+ non_prefixed = f"{name}-flamegraph.svg"
+
+ if (output_dir / network_prefixed).exists():
+ flamegraph_name = network_prefixed
+ flamegraph_path = output_dir / network_prefixed
+ elif (input_dir / non_prefixed).exists():
+ flamegraph_name = non_prefixed
+ flamegraph_path = input_dir / non_prefixed
+
+ # Check for plots - try both network-prefixed and non-prefixed directories
+ plot_files = []
+ plots_dir = None
+
+ network_plots_dir = output_dir / f"{network}-plots"
+ regular_plots_dir = input_dir / "plots"
+
+ if network_plots_dir.exists():
+ plots_dir = network_plots_dir
+ plot_files = [
+ p.name
+ for p in plots_dir.iterdir()
+ if p.name.startswith(f"{name}-") and p.suffix == ".png"
+ ]
+ elif regular_plots_dir.exists():
+ plots_dir = regular_plots_dir
+ plot_files = [
+ p.name
+ for p in plots_dir.iterdir()
+ if p.name.startswith(f"{name}-") and p.suffix == ".png"
+ ]
+
+ if not flamegraph_path and not plot_files:
+ continue
+
+ # Build display label
+ display_label = f"{network} - {name}" if network != "default" else name
+
+ graphs_html += f"""
+
+
{display_label}
+ """
+
+ if flamegraph_path:
+ graphs_html += f"""
+
+ """
+
+ if plot_files and plots_dir:
+ # Determine the relative path for plots
+ plots_rel_path = plots_dir.name
+ for plot in sorted(plot_files):
+ graphs_html += f"""
+
+
+
+ """
+
+ graphs_html += "
"
+
+ if graphs_html:
+ return f"""
+ Flamegraphs and Plots
+ {graphs_html}
+ """
+
+ return ""
+
+ def _copy_artifacts(self, input_dir: Path, output_dir: Path) -> None:
+ """Copy flamegraphs and plots to output directory."""
+ # Skip if input and output are the same directory
+ if input_dir.resolve() == output_dir.resolve():
+ logger.debug("Input and output are the same directory, skipping copy")
+ return
+
+ # Copy flamegraphs
+ for svg in input_dir.glob("*-flamegraph.svg"):
+ dest = output_dir / svg.name
+ shutil.copy2(svg, dest)
+ logger.debug(f"Copied {svg.name}")
+
+ # Copy plots directory
+ plots_dir = input_dir / "plots"
+ if plots_dir.exists():
+ dest_plots = output_dir / "plots"
+ if dest_plots.exists():
+ shutil.rmtree(dest_plots)
+ shutil.copytree(plots_dir, dest_plots)
+ logger.debug("Copied plots directory")
+
+
+class ReportPhase:
+ """Generate reports from benchmark results."""
+
+ def __init__(
+ self, repo_url: str = "https://github.com/bitcoin-dev-tools/benchcoin"
+ ):
+ self.generator = ReportGenerator(repo_url)
+
+ def run(
+ self,
+ input_dir: Path,
+ output_dir: Path,
+ title: str = "Benchmark Results",
+ ) -> ReportResult:
+ """Generate report from benchmark artifacts.
+
+ Args:
+ input_dir: Directory containing results.json and artifacts
+ output_dir: Where to write the HTML report
+ title: Title for the report
+
+ Returns:
+ ReportResult with paths and speedup data
+ """
+ return self.generator.generate(input_dir, output_dir, title)
+
+ def run_multi_network(
+ self,
+ network_dirs: dict[str, Path],
+ output_dir: Path,
+ title: str = "Benchmark Results",
+ pr_number: str | None = None,
+ run_id: str | None = None,
+ ) -> ReportResult:
+ """Generate report from multiple network benchmark results.
+
+ Args:
+ network_dirs: Dict mapping network name to directory containing results.json
+ output_dir: Where to write the HTML report
+ title: Title for the report
+ pr_number: PR number (for CI reports)
+ run_id: Run ID (for CI reports)
+
+ Returns:
+ ReportResult with paths and speedup data
+ """
+ return self.generator.generate_multi_network(
+ network_dirs, output_dir, title, pr_number, run_id
+ )
+
+ def update_index(self, results_dir: Path, output_file: Path) -> None:
+ """Update the main index.html listing all results.
+
+ Args:
+ results_dir: Directory containing pr-* subdirectories
+ output_file: Where to write index.html
+ """
+ self.generator.generate_index(results_dir, output_file)
diff --git a/bench/utils.py b/bench/utils.py
new file mode 100644
index 000000000000..df454cf0644e
--- /dev/null
+++ b/bench/utils.py
@@ -0,0 +1,105 @@
+"""Utility functions for git operations."""
+
+from __future__ import annotations
+
+import logging
+import subprocess
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+class GitState:
+ """Saved git state for restoration after operations."""
+
+ def __init__(self, repo_path: Path | None = None):
+ self.repo_path = repo_path or Path.cwd()
+ self.original_branch: str | None = None
+ self.original_commit: str | None = None
+ self.was_detached: bool = False
+
+ def save(self) -> None:
+ """Save current git state."""
+ # Check if we're on a branch or detached HEAD
+ result = subprocess.run(
+ ["git", "symbolic-ref", "--short", "HEAD"],
+ capture_output=True,
+ text=True,
+ cwd=self.repo_path,
+ )
+
+ if result.returncode == 0:
+ self.original_branch = result.stdout.strip()
+ self.was_detached = False
+ else:
+ # Detached HEAD - save commit hash
+ result = subprocess.run(
+ ["git", "rev-parse", "HEAD"],
+ capture_output=True,
+ text=True,
+ check=True,
+ cwd=self.repo_path,
+ )
+ self.original_commit = result.stdout.strip()
+ self.was_detached = True
+
+ logger.debug(
+ f"Saved git state: branch={self.original_branch}, "
+ f"commit={self.original_commit}, detached={self.was_detached}"
+ )
+
+ def restore(self) -> None:
+ """Restore saved git state."""
+ if self.original_branch:
+ logger.debug(f"Restoring branch: {self.original_branch}")
+ subprocess.run(
+ ["git", "checkout", self.original_branch],
+ check=True,
+ cwd=self.repo_path,
+ )
+ elif self.original_commit:
+ logger.debug(f"Restoring detached HEAD: {self.original_commit}")
+ subprocess.run(
+ ["git", "checkout", self.original_commit],
+ check=True,
+ cwd=self.repo_path,
+ )
+
+
+class GitError(Exception):
+ """Git operation failed."""
+
+ pass
+
+
+def git_checkout(commit: str, repo_path: Path | None = None) -> None:
+ """Checkout a specific commit."""
+ repo_path = repo_path or Path.cwd()
+ logger.info(f"Checking out {commit[:12]}")
+
+ result = subprocess.run(
+ ["git", "checkout", commit],
+ cwd=repo_path,
+ capture_output=True,
+ text=True,
+ )
+
+ if result.returncode != 0:
+ raise GitError(f"Failed to checkout {commit}: {result.stderr}")
+
+
+def git_rev_parse(ref: str, repo_path: Path | None = None) -> str:
+ """Resolve a git reference to a full commit hash."""
+ repo_path = repo_path or Path.cwd()
+
+ result = subprocess.run(
+ ["git", "rev-parse", ref],
+ cwd=repo_path,
+ capture_output=True,
+ text=True,
+ )
+
+ if result.returncode != 0:
+ raise GitError(f"Failed to resolve {ref}: {result.stderr}")
+
+ return result.stdout.strip()
diff --git a/contrib/guix/libexec/build.sh b/contrib/guix/libexec/build.sh
index 48301841841d..512530e21ba8 100755
--- a/contrib/guix/libexec/build.sh
+++ b/contrib/guix/libexec/build.sh
@@ -142,10 +142,10 @@ export GUIX_LD_WRAPPER_DISABLE_RPATH=yes
# Determine the correct value for -Wl,--dynamic-linker for the current $HOST
case "$HOST" in
+ x86_64-linux-gnu) ;;
*linux*)
glibc_dynamic_linker=$(
case "$HOST" in
- x86_64-linux-gnu) echo /lib64/ld-linux-x86-64.so.2 ;;
arm-linux-gnueabihf) echo /lib/ld-linux-armhf.so.3 ;;
aarch64-linux-gnu) echo /lib/ld-linux-aarch64.so.1 ;;
riscv64-linux-gnu) echo /lib/ld-linux-riscv64-lp64d.so.1 ;;
@@ -178,7 +178,8 @@ make -C depends --jobs="$JOBS" HOST="$HOST" \
x86_64_linux_AR=x86_64-linux-gnu-gcc-ar \
x86_64_linux_RANLIB=x86_64-linux-gnu-gcc-ranlib \
x86_64_linux_NM=x86_64-linux-gnu-gcc-nm \
- x86_64_linux_STRIP=x86_64-linux-gnu-strip
+ x86_64_linux_STRIP=x86_64-linux-gnu-strip \
+ NO_QT=1 # Don't bother with static
case "$HOST" in
*darwin*)
@@ -225,6 +226,7 @@ esac
# LDFLAGS
case "$HOST" in
+ x86_64-linux-gnu) HOST_LDFLAGS=" -static-pie -static-libgcc -Wl,-O2" ;;
*linux*) HOST_LDFLAGS="-Wl,--as-needed -Wl,--dynamic-linker=$glibc_dynamic_linker -Wl,-O2" ;;
*mingw*) HOST_LDFLAGS="-Wl,--no-insert-timestamp" ;;
esac
diff --git a/contrib/guix/manifest.scm b/contrib/guix/manifest.scm
index aad03a20f5e6..de27ff44d4f3 100644
--- a/contrib/guix/manifest.scm
+++ b/contrib/guix/manifest.scm
@@ -495,6 +495,37 @@ inspecting signatures in Mach-O binaries.")
(("^install-others =.*$")
(string-append "install-others = " out "/etc/rpc\n")))))))))))))
+(define-public glibc-2.42
+ (let ((commit "71874f167aa5bb1538ff7e394beaacee28ebe65f"))
+ (package
+ (inherit glibc) ;; 2.39
+ (version "2.42")
+ (source (origin
+ (method git-fetch)
+ (uri (git-reference
+ (url "https://sourceware.org/git/glibc.git")
+ (commit commit)))
+ (file-name (git-file-name "glibc" commit))
+ (sha256
+ (base32
+ "1pfbk907fkbavg7grbvb5zlhd3y47f8jj3d2v1s5w7xjnn0ypigq"))
+ (patches (search-our-patches "glibc-2.42-guix-prefix.patch"))))
+ (arguments
+ (substitute-keyword-arguments (package-arguments glibc)
+ ((#:configure-flags flags)
+ `(append ,flags
+ ;; https://www.gnu.org/software/libc/manual/html_node/Configuring-and-compiling.html
+ (list "--enable-stack-protector=all",
+ "--enable-bind-now",
+ "--enable-fortify-source",
+ "--enable-cet=yes",
+ "--enable-nscd=no",
+ "--enable-static-nss=yes",
+ "--disable-timezone-tools",
+ "--disable-profile",
+ "--disable-werror",
+ building-on))))))))
+
;; The sponge tool from moreutils.
(define-public sponge
(package
@@ -563,6 +594,10 @@ inspecting signatures in Mach-O binaries.")
nsis-x86_64
nss-certs
osslsigncode))
+ ((string-contains target "x86_64-linux-")
+ (list (list gcc-toolchain-13 "static")
+ (make-bitcoin-cross-toolchain target
+ #:base-libc glibc-2.42)))
((string-contains target "-linux-")
(list bison
pkg-config
diff --git a/contrib/guix/patches/glibc-2.42-guix-prefix.patch b/contrib/guix/patches/glibc-2.42-guix-prefix.patch
new file mode 100644
index 000000000000..9111fb5b476a
--- /dev/null
+++ b/contrib/guix/patches/glibc-2.42-guix-prefix.patch
@@ -0,0 +1,47 @@
+Without -ffile-prefix-map, the debug symbols will contain paths for the
+guix store which will include the hashes of each package. However, the
+hash for the same package will differ when on different architectures.
+In order to be reproducible regardless of the architecture used to build
+the package, map all guix store prefixes to something fixed, e.g. /usr.
+
+--- a/Makeconfig
++++ b/Makeconfig
+@@ -1074,6 +1074,10 @@ CPPFLAGS-.o = $(pic-default)
+ CFLAGS-.o = $(filter %frame-pointer,$(+cflags)) $(pie-default)
+ CFLAGS-.o += $(call elide-fortify-source,.o,$(routines_no_fortify))
+ CFLAGS-.o += $(call elide-fortify-source,_chk.o,$(routines_no_fortify))
++
++# Map Guix store paths to /usr
++CFLAGS-.o += `find /gnu/store -maxdepth 1 -mindepth 1 -type d -exec echo -n " -ffile-prefix-map={}=/usr" \;`
++
+ libtype.o := lib%.a
+ object-suffixes += .o
+ ifeq (yes,$(build-shared))
+diff --git a/iconv/Makefile b/iconv/Makefile
+index afb3fb7bdb..5acee345e0 100644
+--- a/iconv/Makefile
++++ b/iconv/Makefile
+@@ -65,6 +65,9 @@ CFLAGS-gconv_cache.c += -DGCONV_DIR='"$(gconvdir)"'
+ CFLAGS-gconv_conf.c += -DGCONV_PATH='"$(gconvdir)"'
+ CFLAGS-iconvconfig.c += -DGCONV_PATH='"$(gconvdir)"' -DGCONV_DIR='"$(gconvdir)"'
+
++# Map Guix store paths to /usr
++CFLAGS-.c += `find /gnu/store -maxdepth 1 -mindepth 1 -type d -exec echo -n " -ffile-prefix-map={}=/usr" \;`
++
+ # Set libof-* for each routine.
+ cpp-srcs-left := $(iconv_prog-modules) $(iconvconfig-modules)
+ lib := iconvprogs
+diff --git a/posix/Makefile b/posix/Makefile
+index 3d368b91f6..d79d8fb648 100644
+--- a/posix/Makefile
++++ b/posix/Makefile
+@@ -590,6 +590,9 @@ CFLAGS-execlp.os = -fomit-frame-pointer
+ CFLAGS-nanosleep.c += -fexceptions -fasynchronous-unwind-tables
+ CFLAGS-fork.c = $(libio-mtsafe) $(config-cflags-wno-ignored-attributes)
+
++# Map Guix store paths to /usr
++CFLAGS-.c += `find /gnu/store -maxdepth 1 -mindepth 1 -type d -exec echo -n " -ffile-prefix-map={}=/usr" \;`
++
+ tstgetopt-ARGS = -a -b -cfoobar --required foobar --optional=bazbug \
+ --none random --col --color --colour
+
diff --git a/contrib/guix/security-check.py b/contrib/guix/security-check.py
index be2e0cfbe2af..ac943e33aabd 100755
--- a/contrib/guix/security-check.py
+++ b/contrib/guix/security-check.py
@@ -122,6 +122,10 @@ def check_ELF_CONTROL_FLOW(binary) -> bool:
return False
def check_ELF_FORTIFY(binary) -> bool:
+ # no imported fortified funcs if we are fully static
+ # check could be changed to include all symbols
+ if binary.header.machine_type == lief.ELF.ARCH.X86_64:
+ return True
# bitcoin wrapper does not currently contain any fortified functions
if '--monolithic' in binary.strings:
diff --git a/contrib/guix/symbol-check.py b/contrib/guix/symbol-check.py
index 27483aa03756..71d4743d5823 100755
--- a/contrib/guix/symbol-check.py
+++ b/contrib/guix/symbol-check.py
@@ -29,7 +29,7 @@
MAX_VERSIONS = {
'GLIBC': {
- lief.ELF.ARCH.X86_64: (2,31),
+ lief.ELF.ARCH.X86_64: (0,0),
lief.ELF.ARCH.ARM: (2,31),
lief.ELF.ARCH.AARCH64:(2,31),
lief.ELF.ARCH.PPC64: (2,31),
@@ -40,14 +40,14 @@
# Ignore symbols that are exported as part of every executable
IGNORE_EXPORTS = {
'environ', '_environ', '__environ', '_fini', '_init', 'stdin',
-'stdout', 'stderr',
+'stdout', 'stderr', '__libc_single_threaded',
}
# Expected linker-loader names can be found here:
# https://sourceware.org/glibc/wiki/ABIList?action=recall&rev=16
ELF_INTERPRETER_NAMES: dict[lief.ELF.ARCH, dict[lief.Header.ENDIANNESS, str]] = {
lief.ELF.ARCH.X86_64: {
- lief.Header.ENDIANNESS.LITTLE: "/lib64/ld-linux-x86-64.so.2",
+ lief.Header.ENDIANNESS.LITTLE: "",
},
lief.ELF.ARCH.ARM: {
lief.Header.ENDIANNESS.LITTLE: "/lib/ld-linux-armhf.so.3",
@@ -89,7 +89,6 @@
'libc.so.6', # C library
'libpthread.so.0', # threading
'libm.so.6', # math library
-'ld-linux-x86-64.so.2', # 64-bit dynamic linker
'ld-linux.so.2', # 32-bit dynamic linker
'ld-linux-aarch64.so.1', # 64-bit ARM dynamic linker
'ld-linux-armhf.so.3', # 32-bit ARM dynamic linker
@@ -209,6 +208,10 @@ def check_RUNPATH(binary) -> bool:
def check_ELF_libraries(binary) -> bool:
ok: bool = True
+
+ if binary.header.machine_type == lief.ELF.ARCH.X86_64:
+ return len(binary.libraries) == 0
+
for library in binary.libraries:
if library not in ELF_ALLOWED_LIBRARIES:
print(f'{filename}: {library} is not in ALLOWED_LIBRARIES!')
diff --git a/doc/benchcoin.md b/doc/benchcoin.md
new file mode 100644
index 000000000000..0b4159256c95
--- /dev/null
+++ b/doc/benchcoin.md
@@ -0,0 +1,127 @@
+# benchcoin
+
+A Bitcoin Core benchmarking fork
+
+This repository is a fork of Bitcoin Core that performs automated IBD benchmarking.
+It allows you to measure and compare the performance impact of certain types of changes to Bitcoin Core's codebase on a longer-running IBD benchmark, in a (pretty) reproducible fashion.
+
+## Features
+
+- Automated IBD benchmarking on pull requests
+- Multiple configurations:
+ - Mainnet with default cache
+ - Mainnet with large cache
+- Performance visualizations including:
+ - Flamegraphs for CPU profiling
+ - Time series plots of various metrics
+ - Compare `base` (bitcoin/bitcoin:master) and `head` (PR)
+
+## Example Flamegraph
+
+Below is an example flamegraph showing CPU utilization during IBD:
+
+
+
+## How to use it
+
+1. Open a Pull Request against **this repo**
+2. Wait for the bot to comment on your PR after it's finished.
+
+See the [Contributing](#contributing) section for more details.
+
+## How it works
+
+When you open a pull request against this repository:
+
+1. The CI workflow automatically builds both the base and PR versions of bitcoind
+2. Runs IBD benchmarks
+3. Records performance metrics and creates various visualizations
+4. Posts results as a comment on your PR
+
+The benchmarks test three configurations:
+- Mainnet-default: with default (450 MB) dbcache
+ - From a pruned datadir @ height 840,000 to height 855,000
+- Mainnet-large: with 32000 MB dbcache
+ - From a pruned datadir @ height 840,000 to height 855,000
+
+## Benchmark Outputs
+
+For each benchmark run, you'll get a github pages page with:
+
+- Timing comparisons between base and PR versions
+- CPU flamegraphs showing where time is spent
+- Time series plots showing:
+ - Block height vs time
+ - Cache size vs block height
+ - Cache size vs time
+ - Transaction count vs block height
+ - Coins cache size vs time
+ - LevelDB metrics
+ - Memory pool metrics
+
+## Local Development (WIP)
+
+To run benchmarks locally (WIP, and Linux-only due to [shell.nix](../shell.nix) limitations):
+
+1. Make sure you have [Nix package manager](https://nixos.org/download/) installed
+
+2. Setup the Nix development environment:
+```bash
+nix-shell
+```
+
+3. Run a local benchmark:
+```bash
+just run-signet
+```
+
+This will:
+- Create a temporary directory for testing
+- Build both base and PR versions
+- Download the required UTXO snapshot if needed
+- Run the benchmark
+- Generate performance visualizations
+
+## Technical Details
+
+The benchmarking system uses:
+- [Hyperfine](https://github.com/sharkdp/hyperfine) for benchmark timing
+- [Flamegraph](https://github.com/willcl-ark/flamegraph) for CPU profiling
+- [matplotlib](https://matplotlib.org/) for metric visualization
+- [GitHub Actions](https://github.com/features/actions) for CI automation
+
+The system copies over a pruned datadir to speed up IBD to a more interesting height (840k).
+
+### Runner & seed
+
+The CI runner is self-hosted on a Hetzner AX52 running at the bitcoin-dev-tools organsation level.
+It is running NixOS using configuration found in this repo: [nix-github-runner](https://github.com/bitcoin-dev-tools/nix-github-runner) for easier deployment and reproducibility.
+
+The runner host has 16 cores, with one used for system, one for `flamegraph` (i.e. `perf record`) and 14 dedicated to the Bitcoin Core node under test.
+
+The benchmarking peer on the runner is served blocks over the (real) "internet" (it may be LAN as it's within a single Hetzner region) via a single peer to exercise full IBD codepaths. This naturally may introduce some variance, but it was deemed preferable to running another bitcoin core on the same machine.
+
+This seed peer is another Hetzner VPS in the same region, and its configuration can be found here: [nix-seed-node](https://github.com/bitcoin-dev-tools/nix-seed-node)
+
+## Contributing
+
+### Benchmark an existing bitcoin/bitcoin PR
+
+This requires `just` be installed. If you don't have `just` installed you can run the commands in the [justfile](../justfile) manually.
+
+1. Fork this repository (or bitcoin/bitcoin and add this as a remote)
+2. Create a new branch from benchcoin/master
+3. Run: `just pick-pr ` to cherry-pick commits from the PR
+4. Push the branch
+5. Open a pull request **against this repo. NOT bitcoin/bitcoin**
+
+### Benchmark standalone/new changes
+
+1. Fork this repository (or bitcoin/bitcoin and add this as a remote)
+2. Make your changes to Bitcoin Core
+3. Open a pull request **against this repo. NOT bitcoin/bitcoin**
+4. Wait for benchmark results to be posted on your PR here
+
+## License
+
+This project is licensed under the same terms as Bitcoin Core - see the [COPYING](../COPYING) file for details.
diff --git a/doc/flamegraph.svg b/doc/flamegraph.svg
new file mode 100644
index 000000000000..77f05068edd1
--- /dev/null
+++ b/doc/flamegraph.svg
@@ -0,0 +1,491 @@
+
diff --git a/flake.lock b/flake.lock
new file mode 100644
index 000000000000..fc1308c520fa
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,27 @@
+{
+ "nodes": {
+ "nixpkgs": {
+ "locked": {
+ "lastModified": 1764983851,
+ "narHash": "sha256-y7RPKl/jJ/KAP/VKLMghMgXTlvNIJMHKskl8/Uuar7o=",
+ "owner": "NixOS",
+ "repo": "nixpkgs",
+ "rev": "d9bc5c7dceb30d8d6fafa10aeb6aa8a48c218454",
+ "type": "github"
+ },
+ "original": {
+ "owner": "NixOS",
+ "ref": "nixos-25.11",
+ "repo": "nixpkgs",
+ "type": "github"
+ }
+ },
+ "root": {
+ "inputs": {
+ "nixpkgs": "nixpkgs"
+ }
+ }
+ },
+ "root": "root",
+ "version": 7
+}
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 000000000000..b42180629d1a
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,170 @@
+{
+ description = "bitcoind for benchmarking";
+
+ inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.11";
+
+ outputs =
+ { self, nixpkgs }:
+ let
+ systems = [
+ "x86_64-linux"
+ "aarch64-darwin"
+ ];
+
+ forAllSystems = f: nixpkgs.lib.genAttrs systems (system: f system);
+
+ pkgsFor = system: import nixpkgs { inherit system; };
+
+ mkBitcoinCore =
+ system:
+ let
+ pkgs = pkgsFor system;
+ inherit (pkgs) lib;
+
+ pname = "bitcoin-core";
+ version = self.shortRev or "dirty";
+
+ CFlags = toString [
+ "-O2"
+ "-g"
+ ];
+ CXXFlags = "${CFlags} -fno-omit-frame-pointer";
+
+ nativeBuildInputs = [
+ pkgs.cmake
+ pkgs.ninja
+ pkgs.pkg-config
+ pkgs.python3
+ ];
+
+ buildInputs = [
+ pkgs.boost188.dev
+ pkgs.libevent.dev
+ ];
+
+ cmakeFlags = [
+ "-DBUILD_BENCH=OFF"
+ "-DBUILD_BITCOIN_BIN=OFF"
+ "-DBUILD_CLI=OFF"
+ "-DBUILD_DAEMON=ON"
+ "-DBUILD_FUZZ_BINARY=OFF"
+ "-DBUILD_GUI_TESTS=OFF"
+ "-DBUILD_TESTS=OFF"
+ "-DBUILD_TX=OFF"
+ "-DBUILD_UTIL=OFF"
+ "-DBUILD_WALLET_TOOL=OFF"
+ "-DCMAKE_BUILD_TYPE=RelWithDebInfo"
+ "-DCMAKE_SKIP_RPATH=ON"
+ "-DENABLE_EXTERNAL_SIGNER=OFF"
+ "-DENABLE_IPC=OFF"
+ "-DENABLE_WALLET=OFF"
+ "-DREDUCE_EXPORTS=ON"
+ "-DWITH_ZMQ=OFF"
+ ];
+ in
+ pkgs.stdenv.mkDerivation {
+ inherit
+ pname
+ version
+ nativeBuildInputs
+ buildInputs
+ cmakeFlags
+ ;
+
+ preConfigure = ''
+ cmakeFlagsArray+=(
+ "-DAPPEND_CFLAGS=${CFlags}"
+ "-DAPPEND_CXXFLAGS=${CXXFlags}"
+ "-DAPPEND_LDFLAGS=-Wl,--as-needed -Wl,-O2"
+ )
+ '';
+
+ src = builtins.path {
+ path = ./.;
+ name = "source";
+ };
+
+ env = {
+ CMAKE_GENERATOR = "Ninja";
+ LC_ALL = "C";
+ LIBRARY_PATH = "";
+ CPATH = "";
+ C_INCLUDE_PATH = "";
+ CPLUS_INCLUDE_PATH = "";
+ OBJC_INCLUDE_PATH = "";
+ OBJCPLUS_INCLUDE_PATH = "";
+ };
+
+ dontStrip = true;
+
+ meta = {
+ description = "bitcoind for benchmarking";
+ homepage = "https://bitcoincore.org/";
+ license = lib.licenses.mit;
+ };
+ };
+ in
+ {
+ packages = forAllSystems (system: {
+ default = mkBitcoinCore system;
+ });
+
+ formatter = forAllSystems (system: (pkgsFor system).nixfmt-tree);
+
+ devShells = forAllSystems (
+ system:
+ let
+ pkgs = pkgsFor system;
+ inherit (pkgs) stdenv;
+
+ # Override the default cargo-flamegraph with a custom fork including bitcoin highlighting
+ cargo-flamegraph = pkgs.rustPlatform.buildRustPackage rec {
+ pname = "flamegraph";
+ version = "bitcoin-core";
+
+ src = pkgs.fetchFromGitHub {
+ owner = "willcl-ark";
+ repo = "flamegraph";
+ rev = "bitcoin-core";
+ sha256 = "sha256-tQbr3MYfAiOxeT12V9au5KQK5X5JeGuV6p8GR/Sgen4=";
+ };
+
+ doCheck = false;
+ cargoHash = "sha256-QWPqTyTFSZNJNayNqLmsQSu0rX26XBKfdLROZ9tRjrg=";
+
+ nativeBuildInputs = pkgs.lib.optionals stdenv.hostPlatform.isLinux [ pkgs.makeWrapper ];
+ buildInputs = pkgs.lib.optionals stdenv.hostPlatform.isDarwin [
+ pkgs.darwin.apple_sdk.frameworks.Security
+ ];
+
+ postFixup = pkgs.lib.optionalString stdenv.hostPlatform.isLinux ''
+ wrapProgram $out/bin/cargo-flamegraph \
+ --set-default PERF ${pkgs.perf}/bin/perf
+ wrapProgram $out/bin/flamegraph \
+ --set-default PERF ${pkgs.perf}/bin/perf
+ '';
+ };
+ in
+ {
+ default = pkgs.mkShell {
+ buildInputs = [
+ # Benchmarking
+ cargo-flamegraph
+ pkgs.flamegraph
+ pkgs.hyperfine
+ pkgs.jq
+ pkgs.just
+ pkgs.perf
+ pkgs.perf-tools
+ pkgs.python312
+ pkgs.python312Packages.matplotlib
+ pkgs.util-linux
+
+ # Binary patching
+ pkgs.patchelf
+ ];
+ };
+ }
+ );
+ };
+}
diff --git a/justfile b/justfile
new file mode 100644
index 000000000000..d128c7e8b195
--- /dev/null
+++ b/justfile
@@ -0,0 +1,115 @@
+set shell := ["bash", "-uc"]
+
+default:
+ just --list
+
+# ============================================================================
+# Local benchmarking commands
+# ============================================================================
+
+# Test instrumented run using signet (includes report generation)
+[group('local')]
+test-instrumented base head datadir:
+ nix develop --command python3 bench.py build --skip-existing {{ base }}:base {{ head }}:head
+ nix develop --command python3 bench.py --profile quick run \
+ --chain signet \
+ --instrumented \
+ --datadir {{ datadir }} \
+ base:./binaries/base/bitcoind \
+ head:./binaries/head/bitcoind
+ nix develop --command python3 bench.py report bench-output/ bench-output/
+
+# Test uninstrumented run using signet
+[group('local')]
+test-uninstrumented base head datadir:
+ nix develop --command python3 bench.py build --skip-existing {{ base }}:base {{ head }}:head
+ nix develop --command python3 bench.py --profile quick run \
+ --chain signet \
+ --datadir {{ datadir }} \
+ base:./binaries/base/bitcoind \
+ head:./binaries/head/bitcoind
+
+# Full benchmark with instrumentation (flamegraphs + plots)
+[group('local')]
+instrumented base head datadir:
+ python3 bench.py build {{ base }}:base {{ head }}:head
+ python3 bench.py --profile quick run \
+ --instrumented \
+ --datadir {{ datadir }} \
+ base:./binaries/base/bitcoind \
+ head:./binaries/head/bitcoind
+
+# Just build binaries (useful for incremental testing)
+[group('local')]
+build *commits:
+ python3 bench.py build {{ commits }}
+
+# Run benchmark with pre-built binaries
+[group('local')]
+run datadir *binaries:
+ python3 bench.py run --datadir {{ datadir }} {{ binaries }}
+
+# Generate plots from a debug.log file
+[group('local')]
+analyze commit logfile output_dir="./plots":
+ python3 bench.py analyze {{ commit }} {{ logfile }} --output-dir {{ output_dir }}
+
+# Compare benchmark results
+[group('local')]
+compare *results_files:
+ python3 bench.py compare {{ results_files }}
+
+# Generate HTML report from benchmark results
+[group('local')]
+report input_dir output_dir:
+ python3 bench.py report {{ input_dir }} {{ output_dir }}
+
+# ============================================================================
+# CI commands (called by GitHub Actions)
+# ============================================================================
+
+# Build binaries for CI
+[group('ci')]
+ci-build base_commit head_commit binaries_dir:
+ python3 bench.py build -o {{ binaries_dir }} {{ base_commit }}:base {{ head_commit }}:head
+
+# Run uninstrumented benchmarks for CI
+[group('ci')]
+ci-run datadir tmp_datadir output_dir dbcache binaries_dir:
+ python3 bench.py --profile ci run \
+ --datadir {{ datadir }} \
+ --tmp-datadir {{ tmp_datadir }} \
+ --output-dir {{ output_dir }} \
+ --dbcache {{ dbcache }} \
+ base:{{ binaries_dir }}/base/bitcoind \
+ head:{{ binaries_dir }}/head/bitcoind
+
+# Run instrumented benchmarks for CI
+[group('ci')]
+ci-run-instrumented datadir tmp_datadir output_dir dbcache binaries_dir:
+ python3 bench.py --profile ci run \
+ --instrumented \
+ --datadir {{ datadir }} \
+ --tmp-datadir {{ tmp_datadir }} \
+ --output-dir {{ output_dir }} \
+ --dbcache {{ dbcache }} \
+ base:{{ binaries_dir }}/base/bitcoind \
+ head:{{ binaries_dir }}/head/bitcoind
+
+# ============================================================================
+# Git helpers
+# ============================================================================
+
+# Cherry-pick commits from a Bitcoin Core PR onto this branch
+[group('git')]
+pick-pr pr_number:
+ #!/usr/bin/env bash
+ set -euxo pipefail
+
+ if ! git remote get-url upstream 2>/dev/null | grep -q "bitcoin/bitcoin"; then
+ echo "Error: 'upstream' remote not found or doesn't point to bitcoin/bitcoin"
+ echo "Please add it with: git remote add upstream https://github.com/bitcoin/bitcoin.git"
+ exit 1
+ fi
+
+ git fetch upstream pull/{{ pr_number }}/head:bench-{{ pr_number }} && git cherry-pick $(git rev-list --reverse bench-{{ pr_number }} --not upstream/master)
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 000000000000..26605fc84930
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,15 @@
+[project]
+name = "bitcoin-core-deps"
+version = "0.1.0"
+dependencies = [
+ "codespell==2.2.6",
+ "lief==0.13.2",
+ "mypy==1.4.1",
+ "pyzmq==25.1.0",
+ # Removing in favour of packaged nixpkgs bin which is not dynamically linked
+ # "ruff==0.5.5",
+ "vulture==2.6",
+ "pyperf==2.8.0",
+ "matplotlib==3.8.0",
+ "numpy==1.26.0"
+]
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 000000000000..c9b220b6fe46
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,28 @@
+# This file was autogenerated by uv via the following command:
+# uv pip compile pyproject.toml -o requirements.txt
+codespell==2.2.6
+ # via bitcoin-core-deps (pyproject.toml)
+lief==0.13.2
+ # via bitcoin-core-deps (pyproject.toml)
+matplotlib==3.8.0
+ # via bitcoin-core-deps (pyproject.toml)
+mypy==1.4.1
+ # via bitcoin-core-deps (pyproject.toml)
+mypy-extensions==1.0.0
+ # via mypy
+numpy==1.26.0
+ # via bitcoin-core-deps (pyproject.toml)
+psutil==6.1.0
+ # via pyperf
+pyperf==2.8.0
+ # via bitcoin-core-deps (pyproject.toml)
+pyzmq==25.1.0
+ # via bitcoin-core-deps (pyproject.toml)
+toml==0.10.2
+ # via vulture
+tomli==2.0.2
+ # via mypy
+typing-extensions==4.12.2
+ # via mypy
+vulture==2.6
+ # via bitcoin-core-deps (pyproject.toml)
diff --git a/src/bench/CMakeLists.txt b/src/bench/CMakeLists.txt
index e0e03b1df7cc..9d03f075a750 100644
--- a/src/bench/CMakeLists.txt
+++ b/src/bench/CMakeLists.txt
@@ -29,6 +29,7 @@ add_executable(bench_bitcoin
gcs_filter.cpp
hashpadding.cpp
index_blockfilter.cpp
+ inputfetcher.cpp
load_external.cpp
lockedpool.cpp
logging.cpp
diff --git a/src/bench/inputfetcher.cpp b/src/bench/inputfetcher.cpp
new file mode 100644
index 000000000000..66be4a6ff593
--- /dev/null
+++ b/src/bench/inputfetcher.cpp
@@ -0,0 +1,57 @@
+// Copyright (c) 2024-present The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+static constexpr auto QUEUE_BATCH_SIZE{128};
+static constexpr auto DELAY{2ms};
+
+//! Simulates a DB by adding a delay when calling GetCoin
+class DelayedCoinsView : public CCoinsView
+{
+private:
+ std::chrono::milliseconds m_delay;
+
+public:
+ DelayedCoinsView(std::chrono::milliseconds delay) : m_delay(delay) {}
+
+ std::optional GetCoin(const COutPoint& outpoint) const override
+ {
+ UninterruptibleSleep(m_delay);
+ return Coin{};
+ }
+
+ bool BatchWrite(CoinsViewCacheCursor& cursor, const uint256 &hashBlock) override { return true; }
+};
+
+static void InputFetcherBenchmark(benchmark::Bench& bench)
+{
+ DataStream stream{benchmark::data::block413567};
+ CBlock block;
+ stream >> TX_WITH_WITNESS(block);
+
+ DelayedCoinsView db(DELAY);
+ CCoinsViewCache cache(&db);
+
+ // The main thread should be counted to prevent thread oversubscription, and
+ // to decrease the variance of benchmark results.
+ const auto worker_threads_num{GetNumCores() - 1};
+ InputFetcher fetcher{QUEUE_BATCH_SIZE, worker_threads_num};
+
+ bench.run([&] {
+ const auto ok{cache.Flush()};
+ assert(ok);
+ fetcher.FetchInputs(cache, db, block);
+ });
+}
+
+BENCHMARK(InputFetcherBenchmark, benchmark::PriorityLevel::HIGH);
diff --git a/src/coins.cpp b/src/coins.cpp
index 554a3ebe962b..82619877370d 100644
--- a/src/coins.cpp
+++ b/src/coins.cpp
@@ -110,12 +110,14 @@ void CCoinsViewCache::AddCoin(const COutPoint &outpoint, Coin&& coin, bool possi
(bool)it->second.coin.IsCoinBase());
}
-void CCoinsViewCache::EmplaceCoinInternalDANGER(COutPoint&& outpoint, Coin&& coin) {
+void CCoinsViewCache::EmplaceCoinInternalDANGER(COutPoint&& outpoint, Coin&& coin, bool set_dirty) {
const auto mem_usage{coin.DynamicMemoryUsage()};
auto [it, inserted] = cacheCoins.try_emplace(std::move(outpoint), std::move(coin));
if (inserted) {
- CCoinsCacheEntry::SetDirty(*it, m_sentinel);
cachedCoinsUsage += mem_usage;
+ if (set_dirty) {
+ CCoinsCacheEntry::SetDirty(*it, m_sentinel);
+ }
}
}
diff --git a/src/coins.h b/src/coins.h
index 2fcc764a3fdf..6ceeac3ce2dc 100644
--- a/src/coins.h
+++ b/src/coins.h
@@ -421,12 +421,13 @@ class CCoinsViewCache : public CCoinsViewBacked
/**
* Emplace a coin into cacheCoins without performing any checks, marking
- * the emplaced coin as dirty.
+ * the emplaced coin as dirty unless `set_dirty` is `false`.
*
- * NOT FOR GENERAL USE. Used only when loading coins from a UTXO snapshot.
+ * NOT FOR GENERAL USE. Used when loading coins from a UTXO snapshot, and
+ * in the InputFetcher.
* @sa ChainstateManager::PopulateAndValidateSnapshot()
*/
- void EmplaceCoinInternalDANGER(COutPoint&& outpoint, Coin&& coin);
+ void EmplaceCoinInternalDANGER(COutPoint&& outpoint, Coin&& coin, bool set_dirty = true);
/**
* Spend a coin. Pass moveto in order to get the deleted data.
diff --git a/src/inputfetcher.h b/src/inputfetcher.h
new file mode 100644
index 000000000000..5b89fd0ebe87
--- /dev/null
+++ b/src/inputfetcher.h
@@ -0,0 +1,246 @@
+// Copyright (c) 2024-present The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#ifndef BITCOIN_INPUTFETCHER_H
+#define BITCOIN_INPUTFETCHER_H
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+/**
+ * Input fetcher for fetching inputs from the CoinsDB and inserting
+ * into the CoinsTip.
+ *
+ * The main thread loops through the block and writes all input prevouts to a
+ * global vector. It then wakes all workers and starts working as well. Each
+ * thread assigns itself a range of outpoints from the shared vector, and
+ * fetches the coins from disk. The outpoint and coin pairs are written to a
+ * thread local vector of pairs. Once all outpoints are fetched, the main thread
+ * loops through all thread local vectors and writes the pairs to the cache.
+ */
+class InputFetcher
+{
+private:
+ //! Mutex to protect the inner state
+ Mutex m_mutex{};
+ //! Worker threads block on this when out of work
+ std::condition_variable m_worker_cv{};
+ //! Main thread blocks on this when out of work
+ std::condition_variable m_main_cv{};
+
+ /**
+ * The outpoints to be fetched from disk.
+ * This is written to on the main thread, then read from all worker
+ * threads only after the main thread is done writing. Hence, it doesn't
+ * need to be guarded by a lock.
+ */
+ std::vector m_outpoints{};
+ /**
+ * The index of the last outpoint that is being fetched. Workers assign
+ * themselves a range of outpoints to fetch from m_outpoints. They will use
+ * this index as the end of their range, and then set this index to the
+ * beginning of the range they take for the next worker. Once it gets to
+ * zero, all outpoints have been assigned and the next worker will wait.
+ */
+ size_t m_last_outpoint_index GUARDED_BY(m_mutex){0};
+
+ //! The set of txids of the transactions in the current block being fetched.
+ std::unordered_set m_txids{};
+ //! The vector of thread local vectors of pairs to be written to the cache.
+ std::vector>> m_pairs{};
+
+ /**
+ * Number of outpoint fetches that haven't completed yet.
+ * This includes outpoints that have already been assigned, but are still in
+ * the worker's own batches.
+ */
+ int32_t m_in_flight_outpoints_count GUARDED_BY(m_mutex){0};
+ //! The number of worker threads that are waiting on m_worker_cv
+ int32_t m_idle_worker_count GUARDED_BY(m_mutex){0};
+ //! The maximum number of outpoints to be assigned in one batch
+ const int32_t m_batch_size;
+ //! DB coins view to fetch from.
+ const CCoinsView* m_db{nullptr};
+ //! The cache to check if we already have this input.
+ const CCoinsViewCache* m_cache{nullptr};
+
+ std::vector m_worker_threads;
+ bool m_request_stop GUARDED_BY(m_mutex){false};
+
+ //! Internal function that does the fetching from disk.
+ void Loop(int32_t index, bool is_main_thread = false) noexcept EXCLUSIVE_LOCKS_REQUIRED(!m_mutex)
+ {
+ auto local_batch_size{0};
+ auto end_index{0};
+ auto& cond{is_main_thread ? m_main_cv : m_worker_cv};
+ do {
+ {
+ WAIT_LOCK(m_mutex, lock);
+ // first do the clean-up of the previous loop run (allowing us to do
+ // it in the same critsect) local_batch_size will only be
+ // truthy after first run.
+ if (local_batch_size) {
+ m_in_flight_outpoints_count -= local_batch_size;
+ if (!is_main_thread && m_in_flight_outpoints_count == 0) {
+ m_main_cv.notify_one();
+ }
+ }
+
+ // logically, the do loop starts here
+ while (m_last_outpoint_index == 0) {
+ if ((is_main_thread && m_in_flight_outpoints_count == 0) || m_request_stop) {
+ return;
+ }
+ ++m_idle_worker_count;
+ cond.wait(lock);
+ --m_idle_worker_count;
+ }
+
+ // Assign a batch of outpoints to this thread
+ local_batch_size = std::max(1, std::min(m_batch_size,
+ static_cast(m_last_outpoint_index /
+ (m_worker_threads.size() + 1 + m_idle_worker_count))));
+ end_index = m_last_outpoint_index;
+ m_last_outpoint_index -= local_batch_size;
+ }
+
+ auto& local_pairs{m_pairs[index]};
+ local_pairs.reserve(local_pairs.size() + local_batch_size);
+ try {
+ for (auto i{end_index - local_batch_size}; i < end_index; ++i) {
+ const auto& outpoint{m_outpoints[i]};
+ // If an input spends an outpoint from earlier in the
+ // block, it won't be in the cache yet but it also won't be
+ // in the db either.
+ if (m_txids.contains(outpoint.hash)) {
+ continue;
+ }
+ if (m_cache->HaveCoinInCache(outpoint)) {
+ continue;
+ }
+ if (auto coin{m_db->GetCoin(outpoint)}; coin) {
+ local_pairs.emplace_back(outpoint, std::move(*coin));
+ } else {
+ // Missing an input. This block will fail validation.
+ // Skip remaining outpoints and continue so main thread
+ // can proceed.
+ LOCK(m_mutex);
+ m_in_flight_outpoints_count -= m_last_outpoint_index;
+ m_last_outpoint_index = 0;
+ break;
+ }
+ }
+ } catch (const std::runtime_error&) {
+ // Database error. This will be handled later in validation.
+ // Skip remaining outpoints and continue so main thread
+ // can proceed.
+ LOCK(m_mutex);
+ m_in_flight_outpoints_count -= m_last_outpoint_index;
+ m_last_outpoint_index = 0;
+ }
+ } while (true);
+ }
+
+public:
+
+ //! Create a new input fetcher
+ explicit InputFetcher(int32_t batch_size, int32_t worker_thread_count) noexcept
+ : m_batch_size(batch_size)
+ {
+ if (worker_thread_count < 1) {
+ // Don't do anything if there are no worker threads.
+ return;
+ }
+ m_pairs.reserve(worker_thread_count + 1);
+ for (auto n{0}; n < worker_thread_count + 1; ++n) {
+ m_pairs.emplace_back();
+ }
+ m_worker_threads.reserve(worker_thread_count);
+ for (auto n{0}; n < worker_thread_count; ++n) {
+ m_worker_threads.emplace_back([this, n]() {
+ util::ThreadRename(strprintf("inputfetch.%i", n));
+ Loop(n);
+ });
+ }
+ }
+
+ // Since this class manages its own resources, which is a thread
+ // pool `m_worker_threads`, copy and move operations are not appropriate.
+ InputFetcher(const InputFetcher&) = delete;
+ InputFetcher& operator=(const InputFetcher&) = delete;
+ InputFetcher(InputFetcher&&) = delete;
+ InputFetcher& operator=(InputFetcher&&) = delete;
+
+ //! Fetch all block inputs from db, and insert into cache.
+ void FetchInputs(CCoinsViewCache& cache,
+ const CCoinsView& db,
+ const CBlock& block) noexcept
+ EXCLUSIVE_LOCKS_REQUIRED(!m_mutex)
+ {
+ if (m_worker_threads.empty() || block.vtx.size() <= 1) {
+ return;
+ }
+
+ // Set the db and cache to use for this block.
+ m_db = &db;
+ m_cache = &cache;
+
+ // Loop through the inputs of the block and add them to the queue
+ m_txids.reserve(block.vtx.size() - 1);
+ for (const auto& tx : block.vtx) {
+ if (tx->IsCoinBase()) {
+ continue;
+ }
+ m_outpoints.reserve(m_outpoints.size() + tx->vin.size());
+ for (const auto& in : tx->vin) {
+ m_outpoints.emplace_back(in.prevout);
+ }
+ m_txids.emplace(tx->GetHash());
+ }
+ {
+ LOCK(m_mutex);
+ m_in_flight_outpoints_count = m_outpoints.size();
+ m_last_outpoint_index = m_outpoints.size();
+ }
+ m_worker_cv.notify_all();
+
+ // Have the main thread work too while we wait for other threads
+ Loop(m_worker_threads.size(), /*is_main_thread=*/true);
+
+ // At this point all threads are done writing to m_pairs, so we can
+ // safely read from it and insert the fetched coins into the cache.
+ for (auto& local_pairs : m_pairs) {
+ for (auto&& [outpoint, coin] : local_pairs) {
+ cache.EmplaceCoinInternalDANGER(std::move(outpoint),
+ std::move(coin),
+ /*set_dirty=*/false);
+ }
+ local_pairs.clear();
+ }
+ m_txids.clear();
+ m_outpoints.clear();
+ }
+
+ ~InputFetcher()
+ {
+ WITH_LOCK(m_mutex, m_request_stop = true);
+ m_worker_cv.notify_all();
+ for (std::thread& t : m_worker_threads) {
+ t.join();
+ }
+ }
+};
+
+#endif // BITCOIN_INPUTFETCHER_H
diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt
index 9528004e988e..9927e8f0b681 100644
--- a/src/test/CMakeLists.txt
+++ b/src/test/CMakeLists.txt
@@ -50,6 +50,7 @@ add_executable(test_bitcoin
headers_sync_chainwork_tests.cpp
httpserver_tests.cpp
i2p_tests.cpp
+ inputfetcher_tests.cpp
interfaces_tests.cpp
key_io_tests.cpp
key_tests.cpp
diff --git a/src/test/fuzz/CMakeLists.txt b/src/test/fuzz/CMakeLists.txt
index 607723b978ae..5abc124f6310 100644
--- a/src/test/fuzz/CMakeLists.txt
+++ b/src/test/fuzz/CMakeLists.txt
@@ -54,6 +54,7 @@ add_executable(fuzz
hex.cpp
http_request.cpp
i2p.cpp
+ inputfetcher.cpp
integer.cpp
key.cpp
key_io.cpp
diff --git a/src/test/fuzz/inputfetcher.cpp b/src/test/fuzz/inputfetcher.cpp
new file mode 100644
index 000000000000..ca3c2f7509d1
--- /dev/null
+++ b/src/test/fuzz/inputfetcher.cpp
@@ -0,0 +1,153 @@
+// Copyright (c) 2024-present The Bitcoin Core developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include