From 90f4727b2e2797745b232098046a7885b2e38c1e Mon Sep 17 00:00:00 2001 From: panos Date: Thu, 30 Apr 2026 15:07:23 +0800 Subject: [PATCH 01/15] feat(reference-index): add morph-reference-index crate with MDBX storage layer - 4-table schema: ReferenceIndex, BlockReferenceIndex, IndexedBlocks, IndexMeta - chain identity validation on DB open (chain_id, genesis_hash, schema_version) - BackfillState (NotStarted/InProgress/Complete) + is_ready AtomicBool - writer: write_block / delete_block (three-table atomic) - reader: prefix cursor query with is_ready + lag threshold guards - backfill: jade_first_block binary search + batched backfill with crash recovery - reconcile: canonical hash check (offline reorg detection) + suffix gap fill - metrics helpers for lag, progress, state, readiness - 17 unit tests all passing --- Cargo.lock | 30 ++ Cargo.toml | 2 + crates/reference-index/Cargo.toml | 37 +++ crates/reference-index/src/backfill.rs | 279 +++++++++++++++++ crates/reference-index/src/db.rs | 323 +++++++++++++++++++ crates/reference-index/src/lib.rs | 39 +++ crates/reference-index/src/metrics.rs | 19 ++ crates/reference-index/src/reader.rs | 148 +++++++++ crates/reference-index/src/reconcile.rs | 116 +++++++ crates/reference-index/src/tables.rs | 399 ++++++++++++++++++++++++ crates/reference-index/src/types.rs | 101 ++++++ crates/reference-index/src/writer.rs | 285 +++++++++++++++++ 12 files changed, 1778 insertions(+) create mode 100644 crates/reference-index/Cargo.toml create mode 100644 crates/reference-index/src/backfill.rs create mode 100644 crates/reference-index/src/db.rs create mode 100644 crates/reference-index/src/lib.rs create mode 100644 crates/reference-index/src/metrics.rs create mode 100644 crates/reference-index/src/reader.rs create mode 100644 crates/reference-index/src/reconcile.rs create mode 100644 crates/reference-index/src/tables.rs create mode 100644 crates/reference-index/src/types.rs create mode 100644 crates/reference-index/src/writer.rs diff --git a/Cargo.lock b/Cargo.lock index 7e52838..acdcb32 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4775,10 +4775,12 @@ dependencies = [ "async-trait", "auto_impl", "jsonrpsee", + "metrics", "morph-chainspec", "morph-payload-types", "morph-primitives", "parking_lot", + "reth-metrics", "reth-node-api", "reth-payload-builder", "reth-payload-primitives", @@ -4880,6 +4882,7 @@ dependencies = [ "alloy-eips", "alloy-primitives", "alloy-rlp", + "metrics", "morph-chainspec", "morph-evm", "morph-payload-types", @@ -4888,6 +4891,7 @@ dependencies = [ "reth-chainspec", "reth-evm", "reth-execution-types", + "reth-metrics", "reth-payload-builder", "reth-payload-primitives", "reth-payload-util", @@ -4941,6 +4945,32 @@ dependencies = [ "serde_json", ] +[[package]] +name = "morph-reference-index" +version = "0.2.2" +dependencies = [ + "alloy-consensus", + "alloy-primitives", + "alloy-rlp", + "eyre", + "metrics", + "morph-chainspec", + "morph-primitives", + "reth-chainspec", + "reth-codecs", + "reth-db", + "reth-db-api", + "reth-errors", + "reth-primitives-traits", + "reth-provider", + "reth-storage-api", + "serde", + "tempfile", + "thiserror 2.0.18", + "tokio", + "tracing", +] + [[package]] name = "morph-reth" version = "0.2.2" diff --git a/Cargo.toml b/Cargo.toml index 901cfcb..55f86e5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ members = [ "crates/payload/builder", "crates/payload/types", "crates/primitives", + "crates/reference-index", "crates/revm", "crates/txpool", ] @@ -50,6 +51,7 @@ morph-node = { path = "crates/node"} morph-payload-builder = { path = "crates/payload/builder", default-features = false } morph-payload-types = { path = "crates/payload/types", default-features = false } morph-primitives = { path = "crates/primitives", default-features = false } +morph-reference-index = { path = "crates/reference-index", default-features = false } morph-rpc = { path = "crates/rpc" } morph-revm = { path = "crates/revm", default-features = false } morph-txpool = { path = "crates/txpool", default-features = false } diff --git a/crates/reference-index/Cargo.toml b/crates/reference-index/Cargo.toml new file mode 100644 index 0000000..54ea6d4 --- /dev/null +++ b/crates/reference-index/Cargo.toml @@ -0,0 +1,37 @@ +[package] +name = "morph-reference-index" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +license.workspace = true +publish.workspace = true + +[lints] +workspace = true + +[dependencies] +alloy-consensus.workspace = true +alloy-primitives.workspace = true +alloy-rlp.workspace = true +eyre.workspace = true +metrics.workspace = true +morph-chainspec.workspace = true +morph-primitives = { workspace = true, features = ["reth-codec"] } +reth-chainspec.workspace = true +reth-codecs.workspace = true +reth-db.workspace = true +reth-db-api.workspace = true +reth-errors.workspace = true +reth-provider.workspace = true +reth-primitives-traits.workspace = true +reth-storage-api.workspace = true +serde.workspace = true +thiserror.workspace = true +tokio.workspace = true +tracing.workspace = true + +[dev-dependencies] +tempfile.workspace = true + +[features] +default = [] diff --git a/crates/reference-index/src/backfill.rs b/crates/reference-index/src/backfill.rs new file mode 100644 index 0000000..71db21e --- /dev/null +++ b/crates/reference-index/src/backfill.rs @@ -0,0 +1,279 @@ +//! Historical reference index backfill. +//! +//! Runs once at startup to index all MorphTx transactions from +//! `jade_first_block_number` up to the chain head at startup time. + +use crate::{ + JADE_NOT_ACTIVE_SENTINEL, ReferenceIndexDb, + types::{BackfillState, ReferenceIndexError}, + writer::{ + set_backfill_state, set_jade_first_block_number, update_backfill_current, + update_indexed_from, update_indexed_to, write_block, + }, +}; +use alloy_consensus::BlockHeader; +use morph_chainspec::hardfork::{MorphHardfork, MorphHardforks}; +use morph_primitives::MorphHeader; +use reth_chainspec::ForkCondition; +use reth_db_api::transaction::DbTx; +use reth_provider::{BlockReader, HeaderProvider}; +use reth_storage_api::{BlockNumReader, TransactionVariant}; +use tracing::{debug, info}; + +/// Determine the first block number at which the Jade hardfork is active. +/// +/// Returns `JADE_NOT_ACTIVE_SENTINEL` (`u64::MAX`) when: +/// - The chain spec has no Jade timestamp condition, or +/// - The current canonical head has not yet reached Jade activation. +/// +/// The dual-condition check (`block(N).timestamp >= jade_ts` AND +/// `block(N-1).timestamp < jade_ts`) guarantees we return the **first** +/// Jade block, not just any block after activation. +pub fn resolve_jade_first_block( + provider: &P, + chain_spec: &CS, + head: u64, +) -> Result +where + P: HeaderProvider
, + CS: MorphHardforks, +{ + let jade_ts = match chain_spec.morph_fork_activation(MorphHardfork::Jade) { + ForkCondition::Timestamp(ts) => ts, + _ => return Ok(JADE_NOT_ACTIVE_SENTINEL), + }; + + let head_header = provider + .header_by_number(head)? + .ok_or_else(|| ReferenceIndexError::Other(eyre::eyre!("missing canonical head header")))?; + + if head_header.timestamp() < jade_ts { + return Ok(JADE_NOT_ACTIVE_SENTINEL); + } + + // Binary-search over [0, head] to find the lowest block whose timestamp + // is >= jade_ts. We still verify the prev block to guard against unusual + // timestamp monotonicity violations. + let mut lo = 0u64; + let mut hi = head; + while lo < hi { + let mid = lo + (hi - lo) / 2; + let mid_header = provider + .header_by_number(mid)? + .ok_or_else(|| ReferenceIndexError::Other(eyre::eyre!("missing header at {mid}")))?; + if mid_header.timestamp() < jade_ts { + lo = mid + 1; + } else { + hi = mid; + } + } + + // `lo` is now the first block whose timestamp >= jade_ts. + // Verify that the previous block (if any) has timestamp < jade_ts. + if lo > 0 { + let prev = provider + .header_by_number(lo - 1)? + .ok_or_else(|| { + ReferenceIndexError::Other(eyre::eyre!("missing header at {}", lo - 1)) + })?; + if prev.timestamp() >= jade_ts { + // Shouldn't happen on a well-formed chain, but fall back to sentinel. + return Ok(JADE_NOT_ACTIVE_SENTINEL); + } + } + + Ok(lo) +} + +/// Run (or resume) the historical backfill. +/// +/// This function is synchronous and blocks the calling task until backfill is +/// complete. Batches are committed atomically so a crash mid-run can be +/// safely resumed from the last persisted checkpoint. +pub fn run_backfill( + db: &ReferenceIndexDb, + provider: &P, + chain_spec: &CS, + head_at_startup: u64, + batch_size: u64, +) -> Result<(), ReferenceIndexError> +where + P: BlockReader + BlockNumReader + HeaderProvider
, + CS: MorphHardforks, +{ + let state = db.backfill_state()?; + + let start = match state { + BackfillState::Complete => return Ok(()), + BackfillState::InProgress => { + // Resume from last checkpoint + 1 (checkpoint is the last fully written block). + db.indexed_to()?.saturating_add(1) + } + BackfillState::NotStarted => { + let jade_first = resolve_jade_first_block(provider, chain_spec, head_at_startup)?; + + let tx = db.tx_mut()?; + set_jade_first_block_number(&tx, jade_first)?; + set_backfill_state(&tx, BackfillState::InProgress)?; + tx.commit()?; + + if jade_first == JADE_NOT_ACTIVE_SENTINEL || jade_first > head_at_startup { + // Nothing to backfill; mark complete immediately. + let tx = db.tx_mut()?; + update_indexed_from(&tx, head_at_startup)?; + update_indexed_to(&tx, head_at_startup)?; + set_backfill_state(&tx, BackfillState::Complete)?; + tx.commit()?; + return Ok(()); + } + + jade_first + } + }; + + if start > head_at_startup { + // Already up to date. + let tx = db.tx_mut()?; + set_backfill_state(&tx, BackfillState::Complete)?; + tx.commit()?; + return Ok(()); + } + + let jade_first = db + .jade_first_block_number()? + .unwrap_or(start); + + info!( + target: "morph::reference_index", + start, head_at_startup, + "starting reference index backfill" + ); + + let total = head_at_startup.saturating_sub(start).saturating_add(1) as f64; + let mut done = 0f64; + let mut current = start; + + while current <= head_at_startup { + let batch_end = current.saturating_add(batch_size - 1).min(head_at_startup); + let is_last_batch = batch_end == head_at_startup; + + let tx = db.tx_mut()?; + for number in current..=batch_end { + let block = provider + .sealed_block_with_senders(number.into(), TransactionVariant::NoHash)? + .ok_or_else(|| { + ReferenceIndexError::Other(eyre::eyre!( + "missing block {number} during backfill" + )) + })?; + + write_block( + &tx, + block.number(), + block.hash(), + block.timestamp(), + &block.body().transactions, + )?; + } + + if is_last_batch { + // Atomic: data + Complete + indexed_from + indexed_to in one commit. + update_indexed_from(&tx, jade_first)?; + update_indexed_to(&tx, head_at_startup)?; + set_backfill_state(&tx, BackfillState::Complete)?; + } else { + update_backfill_current(&tx, batch_end)?; + update_indexed_to(&tx, batch_end)?; + set_backfill_state(&tx, BackfillState::InProgress)?; + } + tx.commit()?; + + done += (batch_end - current + 1) as f64; + crate::metrics::set_backfill_progress(done / total); + + debug!( + target: "morph::reference_index", + batch_start = current, + batch_end, + is_last_batch, + "backfill batch committed" + ); + + current = batch_end + 1; + + if !is_last_batch { + std::thread::sleep(std::time::Duration::from_millis(10)); + } + } + + info!(target: "morph::reference_index", "reference index backfill complete"); + Ok(()) +} + +/// Re-validate and (if needed) reset `jade_first_block_number` when the DB +/// was opened with a sentinel value (`u64::MAX`) from a previous run where +/// Jade had not yet activated. +/// +/// Call this once at startup before `run_backfill`. +pub fn maybe_reset_jade_sentinel( + db: &ReferenceIndexDb, + provider: &P, + chain_spec: &CS, + head: u64, +) -> Result<(), ReferenceIndexError> +where + P: HeaderProvider
, + CS: MorphHardforks, +{ + if db.backfill_state()? != BackfillState::Complete { + return Ok(()); + } + match db.jade_first_block_number()? { + Some(n) if n == JADE_NOT_ACTIVE_SENTINEL => { + // Jade was not active when last resolved. Re-try now. + let new = resolve_jade_first_block(provider, chain_spec, head)?; + if new != JADE_NOT_ACTIVE_SENTINEL { + // Jade has since activated; reset backfill state so the + // next startup picks it up. + let tx = db.tx_mut()?; + set_jade_first_block_number(&tx, new)?; + set_backfill_state(&tx, BackfillState::NotStarted)?; + tx.commit()?; + info!( + target: "morph::reference_index", + jade_first_block = new, + "Jade has activated; resetting backfill to index from first Jade block" + ); + } + } + _ => {} + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn jade_not_active_sentinel_is_u64_max() { + assert_eq!(JADE_NOT_ACTIVE_SENTINEL, u64::MAX); + } + + #[test] + fn backfill_state_try_from_roundtrip() { + assert_eq!( + BackfillState::try_from(0u8).unwrap(), + BackfillState::NotStarted + ); + assert_eq!( + BackfillState::try_from(1u8).unwrap(), + BackfillState::InProgress + ); + assert_eq!( + BackfillState::try_from(2u8).unwrap(), + BackfillState::Complete + ); + assert!(BackfillState::try_from(3u8).is_err()); + } +} diff --git a/crates/reference-index/src/db.rs b/crates/reference-index/src/db.rs new file mode 100644 index 0000000..36977e8 --- /dev/null +++ b/crates/reference-index/src/db.rs @@ -0,0 +1,323 @@ +//! Reference index database opening and metadata helpers. + +use crate::{ + tables::{IndexedBlockKey, IndexedBlocks, MetaKey, MetaValue, ReferenceIndexTables}, + types::{BackfillState, ReferenceIndexError, SCHEMA_VERSION}, +}; +use alloy_primitives::B256; +use reth_db::{DatabaseEnv, mdbx::DatabaseArguments}; +use reth_db_api::{ + Database, + cursor::DbCursorRO, + transaction::{DbTx, DbTxMut}, +}; +use std::{ + path::Path, + sync::{ + Arc, + atomic::{AtomicBool, Ordering}, + }, +}; + +// ── meta key discriminants ──────────────────────────────────────────────────── + +/// Discriminant values for the `IndexMeta` table. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +pub enum IndexMetaKey { + IndexedFrom = 1, + IndexedTo = 2, + BackfillState = 3, + BackfillCurrent = 4, + ChainId = 5, + GenesisHash = 6, + SchemaVersion = 7, + JadeFirstBlockNumber = 8, + SnapshotBlockNumber = 9, + SnapshotBlockHash = 10, +} + +impl From for MetaKey { + fn from(k: IndexMetaKey) -> Self { + Self(k as u8) + } +} + +// ── codec helpers ───────────────────────────────────────────────────────────── + +pub fn encode_u64(value: u64) -> MetaValue { + MetaValue(value.to_be_bytes().to_vec()) +} + +pub fn decode_u64(value: MetaValue) -> Result { + let bytes: [u8; 8] = value.0.as_slice().try_into().map_err(|_| { + ReferenceIndexError::Other(eyre::eyre!("invalid u64 metadata length")) + })?; + Ok(u64::from_be_bytes(bytes)) +} + +pub fn encode_u32(value: u32) -> MetaValue { + MetaValue(value.to_be_bytes().to_vec()) +} + +pub fn decode_u32(value: MetaValue) -> Result { + let bytes: [u8; 4] = value.0.as_slice().try_into().map_err(|_| { + ReferenceIndexError::Other(eyre::eyre!("invalid u32 metadata length")) + })?; + Ok(u32::from_be_bytes(bytes)) +} + +pub fn encode_b256(value: B256) -> MetaValue { + MetaValue(value.as_slice().to_vec()) +} + +pub fn decode_b256(value: MetaValue) -> Result { + let bytes: [u8; 32] = value.0.as_slice().try_into().map_err(|_| { + ReferenceIndexError::Other(eyre::eyre!("invalid B256 metadata length")) + })?; + Ok(B256::new(bytes)) +} + +// ── database handle ─────────────────────────────────────────────────────────── + +/// Handle to the reference index MDBX database. +/// +/// Wraps the raw [`DatabaseEnv`] and exposes metadata read helpers and an +/// atomic readiness flag. All writes go through `tx_mut()`. +#[derive(Debug, Clone)] +pub struct ReferenceIndexDb { + db: Arc, + ready: Arc, +} + +impl ReferenceIndexDb { + /// Open (or create) the reference index at `path` and validate chain identity. + /// + /// On the first open (empty DB), writes `chain_id`, `genesis_hash`, and + /// `schema_version` into `IndexMeta`. On every subsequent open, re-reads + /// those values and returns an error if any mismatch is detected. + pub fn open( + path: impl AsRef, + chain_id: u64, + genesis_hash: B256, + ) -> Result { + let db = reth_db::mdbx::init_db_for::<_, ReferenceIndexTables>( + path, + DatabaseArguments::new(reth_db::models::ClientVersion::default()), + ) + .map_err(|e| ReferenceIndexError::Other(eyre::eyre!("failed to open reference index DB: {e}")))?; + + let this = Self { + db: Arc::new(db), + ready: Arc::new(AtomicBool::new(false)), + }; + + this.validate_or_init_chain_identity(chain_id, genesis_hash)?; + Ok(this) + } + + /// Check (or initialise) the persisted chain identity in `IndexMeta`. + fn validate_or_init_chain_identity( + &self, + chain_id: u64, + genesis_hash: B256, + ) -> Result<(), ReferenceIndexError> { + let tx = self.tx()?; + let stored_chain_id = tx + .get::(IndexMetaKey::ChainId.into())? + .map(decode_u64) + .transpose()?; + + // First-ever open: write identity and return. + if stored_chain_id.is_none() { + drop(tx); + let tx = self.tx_mut()?; + tx.put::(IndexMetaKey::ChainId.into(), encode_u64(chain_id))?; + tx.put::( + IndexMetaKey::GenesisHash.into(), + encode_b256(genesis_hash), + )?; + tx.put::( + IndexMetaKey::SchemaVersion.into(), + encode_u32(SCHEMA_VERSION), + )?; + tx.commit()?; + return Ok(()); + } + + // Subsequent opens: validate every field. + if stored_chain_id != Some(chain_id) { + return Err(ReferenceIndexError::ChainIdentityMismatch("chain_id")); + } + + let stored_genesis = tx + .get::(IndexMetaKey::GenesisHash.into())? + .map(decode_b256) + .transpose()?; + if stored_genesis != Some(genesis_hash) { + return Err(ReferenceIndexError::ChainIdentityMismatch("genesis_hash")); + } + + let stored_schema = tx + .get::(IndexMetaKey::SchemaVersion.into())? + .map(decode_u32) + .transpose()?; + match stored_schema { + Some(v) if v != SCHEMA_VERSION => { + return Err(ReferenceIndexError::SchemaMismatch { + expected: SCHEMA_VERSION, + actual: v, + }); + } + _ => {} + } + + Ok(()) + } + + // ── readiness ───────────────────────────────────────────────────────────── + + pub fn is_ready(&self) -> bool { + self.ready.load(Ordering::Acquire) + } + + pub fn set_ready(&self, ready: bool) { + self.ready.store(ready, Ordering::Release); + crate::metrics::set_ready(ready); + } + + // ── transaction factory ─────────────────────────────────────────────────── + + pub fn tx(&self) -> Result<::TX, ReferenceIndexError> { + Ok(self.db.tx()?) + } + + pub fn tx_mut(&self) -> Result<::TXMut, ReferenceIndexError> { + Ok(self.db.tx_mut()?) + } + + // ── metadata reads ──────────────────────────────────────────────────────── + + pub fn backfill_state(&self) -> Result { + let tx = self.tx()?; + match tx.get::(IndexMetaKey::BackfillState.into())? { + Some(v) => BackfillState::try_from(*v.0.first().unwrap_or(&0)), + None => Ok(BackfillState::NotStarted), + } + } + + pub fn indexed_to(&self) -> Result { + let tx = self.tx()?; + tx.get::(IndexMetaKey::IndexedTo.into())? + .map(decode_u64) + .transpose() + .map(|v| v.unwrap_or(0)) + } + + pub fn indexed_from(&self) -> Result, ReferenceIndexError> { + let tx = self.tx()?; + tx.get::(IndexMetaKey::IndexedFrom.into())? + .map(decode_u64) + .transpose() + } + + pub fn jade_first_block_number(&self) -> Result, ReferenceIndexError> { + let tx = self.tx()?; + tx.get::(IndexMetaKey::JadeFirstBlockNumber.into())? + .map(decode_u64) + .transpose() + } + + pub fn snapshot_block_number(&self) -> Result, ReferenceIndexError> { + let tx = self.tx()?; + tx.get::(IndexMetaKey::SnapshotBlockNumber.into())? + .map(decode_u64) + .transpose() + } + + pub fn snapshot_block_hash(&self) -> Result, ReferenceIndexError> { + let tx = self.tx()?; + tx.get::(IndexMetaKey::SnapshotBlockHash.into())? + .map(decode_b256) + .transpose() + } + + /// Returns the canonical block hash stored in `IndexedBlocks` for `block_number`. + pub fn indexed_block_hash( + &self, + block_number: u64, + ) -> Result, ReferenceIndexError> { + let tx = self.tx()?; + Ok(tx + .get::(IndexedBlockKey { block_number })? + .map(|v| v.0)) + } + + /// Returns the highest block number recorded in `IndexedBlocks`. + pub fn highest_indexed_block(&self) -> Result, ReferenceIndexError> { + let tx = self.tx()?; + let mut cursor = tx.cursor_read::()?; + Ok(cursor.last()?.map(|(k, _)| k.block_number)) + } + + /// Returns the highest block number recorded in `BlockReferenceIndex`. + /// Used for gap detection after startup. + pub fn highest_block_reference_index(&self) -> Result, ReferenceIndexError> { + let tx = self.tx()?; + let mut cursor = + tx.cursor_read::()?; + Ok(cursor + .last()? + .map(|(k, _)| k.block_number)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use alloy_primitives::B256; + use tempfile::TempDir; + + fn open_temp_db() -> (TempDir, ReferenceIndexDb) { + let dir = TempDir::new().unwrap(); + let db = ReferenceIndexDb::open(dir.path(), 2818, B256::ZERO).unwrap(); + (dir, db) + } + + #[test] + fn open_writes_chain_identity_on_first_open() { + let (_dir, db) = open_temp_db(); + assert_eq!(db.backfill_state().unwrap(), BackfillState::NotStarted); + assert_eq!(db.indexed_to().unwrap(), 0); + } + + #[test] + fn open_rejects_mismatched_chain_id() { + let dir = TempDir::new().unwrap(); + ReferenceIndexDb::open(dir.path(), 2818, B256::ZERO).unwrap(); + // Re-open with wrong chain_id. + let err = ReferenceIndexDb::open(dir.path(), 9999, B256::ZERO).unwrap_err(); + assert!(matches!(err, ReferenceIndexError::ChainIdentityMismatch("chain_id"))); + } + + #[test] + fn open_rejects_mismatched_genesis_hash() { + let dir = TempDir::new().unwrap(); + ReferenceIndexDb::open(dir.path(), 2818, B256::ZERO).unwrap(); + let err = + ReferenceIndexDb::open(dir.path(), 2818, B256::repeat_byte(0xff)).unwrap_err(); + assert!(matches!(err, ReferenceIndexError::ChainIdentityMismatch("genesis_hash"))); + } + + #[test] + fn encode_decode_u64_roundtrip() { + let v = 0xDEAD_BEEF_CAFE_1234u64; + assert_eq!(decode_u64(encode_u64(v)).unwrap(), v); + } + + #[test] + fn encode_decode_b256_roundtrip() { + let v = B256::repeat_byte(0xab); + assert_eq!(decode_b256(encode_b256(v)).unwrap(), v); + } +} diff --git a/crates/reference-index/src/lib.rs b/crates/reference-index/src/lib.rs new file mode 100644 index 0000000..caa4604 --- /dev/null +++ b/crates/reference-index/src/lib.rs @@ -0,0 +1,39 @@ +//! Persistent Morph transaction reference index. + +#![cfg_attr(not(test), warn(unused_crate_dependencies))] + +use alloy_consensus as _; +use alloy_rlp as _; +use morph_chainspec as _; +use morph_primitives as _; +use reth_codecs as _; +use reth_primitives_traits as _; +use reth_provider as _; +use reth_storage_api as _; +use tokio as _; +use tracing as _; + +pub mod backfill; +pub mod db; +pub mod metrics; +pub mod reader; +pub mod reconcile; +pub mod tables; +pub mod types; +pub mod writer; + +pub use db::ReferenceIndexDb; +pub use reader::ReferenceIndexReader; +pub use types::{ + BackfillState, JADE_NOT_ACTIVE_SENTINEL, ReferenceIndexError, ReferenceQuery, + ReferenceTransactionResult, SCHEMA_VERSION, +}; + +/// Default number of canonical blocks the ExEx may lag before RPC returns an error. +pub const DEFAULT_LAG_THRESHOLD: u64 = 16; + +/// Default number of blocks checked during startup reconcile for offline reorgs. +pub const DEFAULT_MAX_REORG_DEPTH: u64 = 64; + +/// Default backfill batch size. +pub const DEFAULT_BACKFILL_BATCH_BLOCKS: u64 = 256; diff --git a/crates/reference-index/src/metrics.rs b/crates/reference-index/src/metrics.rs new file mode 100644 index 0000000..4a2dc90 --- /dev/null +++ b/crates/reference-index/src/metrics.rs @@ -0,0 +1,19 @@ +pub fn set_ready(ready: bool) { + metrics::gauge!("morph_reference_index_ready").set(if ready { 1.0 } else { 0.0 }); +} + +pub fn set_lag_blocks(lag: u64) { + metrics::gauge!("morph_reference_index_lag_blocks").set(lag as f64); +} + +pub fn set_backfill_progress(progress: f64) { + metrics::gauge!("morph_reference_index_backfill_progress").set(progress.clamp(0.0, 1.0)); +} + +pub fn set_backfill_state(state: u8) { + metrics::gauge!("morph_reference_index_backfill_state").set(state as f64); +} + +pub fn increment_entries(count: u64) { + metrics::counter!("morph_reference_index_entries_total").increment(count); +} diff --git a/crates/reference-index/src/reader.rs b/crates/reference-index/src/reader.rs new file mode 100644 index 0000000..6414a27 --- /dev/null +++ b/crates/reference-index/src/reader.rs @@ -0,0 +1,148 @@ +//! Reference index read path helpers. + +use crate::{ + db::ReferenceIndexDb, + tables::{ReferenceIndex, ReferenceIndexKey}, + types::{ReferenceIndexError, ReferenceQuery, ReferenceTransactionResult}, +}; +use alloy_primitives::{B256, U64}; +use reth_db_api::{cursor::DbCursorRO, transaction::DbTx}; + +/// Read-only facade for reference index queries. +/// +/// `canonical_tip` must be the current chain head block number so the reader +/// can detect excessive lag between the index and the live chain. +#[derive(Clone, Debug)] +pub struct ReferenceIndexReader { + db: ReferenceIndexDb, + lag_threshold: u64, +} + +impl ReferenceIndexReader { + pub const fn new(db: ReferenceIndexDb, lag_threshold: u64) -> Self { + Self { db, lag_threshold } + } + + pub fn db(&self) -> &ReferenceIndexDb { + &self.db + } + + /// Execute a paginated reference query. + /// + /// `canonical_tip` is the current best block number, used to compute lag. + pub fn query( + &self, + query: ReferenceQuery, + canonical_tip: u64, + ) -> Result, ReferenceIndexError> { + if !self.db.is_ready() { + return Err(ReferenceIndexError::Initializing); + } + + let indexed_to = self.db.indexed_to()?; + if canonical_tip.saturating_sub(indexed_to) > self.lag_threshold { + crate::metrics::set_lag_blocks(canonical_tip.saturating_sub(indexed_to)); + return Err(ReferenceIndexError::IndexBehind); + } + + let tx = self.db.tx()?; + let mut cursor = tx.cursor_read::()?; + + let seek_key = ReferenceIndexKey { + reference: query.reference, + block_number: 0, + transaction_index: 0, + transaction_hash: B256::ZERO, + }; + + let mut skipped = 0u64; + let mut results = Vec::new(); + let mut next = cursor.seek(seek_key)?; + + while let Some((key, value)) = next { + if key.reference != query.reference { + break; + } + if results.len() as u64 >= query.limit { + break; + } + if skipped < query.offset { + skipped += 1; + } else { + results.push(ReferenceTransactionResult { + transaction_hash: key.transaction_hash, + block_number: U64::from(key.block_number), + block_timestamp: U64::from(value.0), + transaction_index: U64::from(key.transaction_index), + }); + } + next = cursor.next()?; + } + + crate::metrics::set_lag_blocks(canonical_tip.saturating_sub(indexed_to)); + Ok(results) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + ReferenceIndexDb, + writer::{update_indexed_to, write_block}, + }; + use alloy_primitives::B256; + use tempfile::TempDir; + + fn open_ready_db() -> (TempDir, ReferenceIndexDb) { + let dir = TempDir::new().unwrap(); + let db = ReferenceIndexDb::open(dir.path(), 2818, B256::ZERO).unwrap(); + db.set_ready(true); + (dir, db) + } + + #[test] + fn query_returns_initializing_when_not_ready() { + let dir = TempDir::new().unwrap(); + let db = ReferenceIndexDb::open(dir.path(), 2818, B256::ZERO).unwrap(); + // is_ready stays false + let reader = ReferenceIndexReader::new(db, 16); + let q = ReferenceQuery::new(B256::ZERO, None, None).unwrap(); + assert!(matches!( + reader.query(q, 100), + Err(ReferenceIndexError::Initializing) + )); + } + + #[test] + fn query_returns_index_behind_when_lag_exceeds_threshold() { + let (_dir, db) = open_ready_db(); + + let tx = db.tx_mut().unwrap(); + update_indexed_to(&tx, 10).unwrap(); + tx.commit().unwrap(); + + let reader = ReferenceIndexReader::new(db, 16); + let q = ReferenceQuery::new(B256::with_last_byte(1), None, None).unwrap(); + // canonical_tip=27 → lag=17 > threshold=16 + assert!(matches!( + reader.query(q, 27), + Err(ReferenceIndexError::IndexBehind) + )); + } + + #[test] + fn query_returns_empty_when_no_reference_txs() { + let (_dir, db) = open_ready_db(); + + let tx = db.tx_mut().unwrap(); + write_block(&tx, 1, B256::repeat_byte(0x01), 100, &[]).unwrap(); + update_indexed_to(&tx, 1).unwrap(); + tx.commit().unwrap(); + + let reader = ReferenceIndexReader::new(db, 16); + let q = ReferenceQuery::new(B256::with_last_byte(0x42), None, None).unwrap(); + let results = reader.query(q, 1).unwrap(); + assert!(results.is_empty()); + } +} diff --git a/crates/reference-index/src/reconcile.rs b/crates/reference-index/src/reconcile.rs new file mode 100644 index 0000000..25981b0 --- /dev/null +++ b/crates/reference-index/src/reconcile.rs @@ -0,0 +1,116 @@ +//! Startup canonical chain reconciliation. +//! +//! After backfill completes, reconcile checks the last `max_reorg_depth` +//! indexed blocks against the current canonical chain to detect any reorgs +//! that occurred while the node was offline, then fills any suffix gap between +//! `indexed_to` and `head_at_startup`. + +use crate::{ + ReferenceIndexDb, + types::ReferenceIndexError, + writer::{delete_block, update_indexed_to, write_block}, +}; +use alloy_consensus::BlockHeader; +use morph_primitives::MorphHeader; +use reth_db_api::transaction::DbTx; +use reth_provider::{BlockHashReader, BlockReader, HeaderProvider}; +use reth_storage_api::TransactionVariant; +use tracing::{debug, info}; + +/// Run the startup reconciliation pass. +/// +/// Steps: +/// A. Canonical hash check over the last `max_reorg_depth` indexed blocks. +/// On mismatch, find the lowest diverging height, delete forward entries, +/// reset `indexed_to`, and continue as if filling a gap. +/// B. Suffix gap fill: write every block from `indexed_to + 1` to +/// `current_canonical_head` into the three index tables. +pub fn run_startup_reconcile

( + db: &ReferenceIndexDb, + provider: &P, + current_head: u64, + max_reorg_depth: u64, +) -> Result<(), ReferenceIndexError> +where + P: BlockReader + + HeaderProvider

+ + BlockHashReader, +{ + let indexed_to = db.indexed_to()?; + + // ── Step A: canonical hash check ──────────────────────────────────────── + let check_start = indexed_to.saturating_sub(max_reorg_depth.saturating_sub(1)); + let mut fork_height: Option = None; + + for number in check_start..=indexed_to { + let indexed_hash = db.indexed_block_hash(number)?; + let canonical_hash = provider.block_hash(number)?; + + if indexed_hash != canonical_hash { + debug!( + target: "morph::reference_index", + number, + ?indexed_hash, + ?canonical_hash, + "canonical hash mismatch during reconcile" + ); + fork_height = Some(number); + break; + } + } + + // ── Step B: apply reorg if detected ────────────────────────────────────── + let rebuild_start = if let Some(fh) = fork_height { + info!( + target: "morph::reference_index", + fork_height = fh, + old_indexed_to = indexed_to, + "offline reorg detected; rolling back index" + ); + + let tx = db.tx_mut()?; + for number in fh..=indexed_to { + delete_block(&tx, number)?; + } + let new_indexed_to = fh.saturating_sub(1); + update_indexed_to(&tx, new_indexed_to)?; + tx.commit()?; + + fh + } else { + indexed_to.saturating_add(1) + }; + + // ── Step C: suffix gap fill ─────────────────────────────────────────────── + if rebuild_start <= current_head { + info!( + target: "morph::reference_index", + rebuild_start, + current_head, + "filling reference index suffix gap" + ); + + let tx = db.tx_mut()?; + for number in rebuild_start..=current_head { + let block = provider + .sealed_block_with_senders(number.into(), TransactionVariant::NoHash)? + .ok_or_else(|| { + ReferenceIndexError::Other(eyre::eyre!( + "missing block {number} during reconcile" + )) + })?; + + write_block( + &tx, + block.number(), + block.hash(), + block.timestamp(), + &block.body().transactions, + )?; + } + update_indexed_to(&tx, current_head)?; + tx.commit()?; + } + + Ok(()) +} diff --git a/crates/reference-index/src/tables.rs b/crates/reference-index/src/tables.rs new file mode 100644 index 0000000..c873166 --- /dev/null +++ b/crates/reference-index/src/tables.rs @@ -0,0 +1,399 @@ +//! Reference index table declarations. + +use alloy_primitives::B256; +use reth_db_api::{ + DatabaseError, TableSet, TableType, TableViewer, + table::{Compress, Decode, Decompress, Encode, TableInfo}, + tables, +}; +use serde::{Deserialize, Serialize}; +use std::fmt; + +/// Key for looking up transactions by Morph transaction reference. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] +pub struct ReferenceIndexKey { + pub reference: B256, + pub block_number: u64, + pub transaction_index: u64, + pub transaction_hash: B256, +} + +impl ReferenceIndexKey { + /// Encoded length: reference(32) + block number(8) + transaction index(8) + hash(32). + pub const ENCODED_LEN: usize = 80; +} + +impl Encode for ReferenceIndexKey { + type Encoded = [u8; Self::ENCODED_LEN]; + + fn encode(self) -> Self::Encoded { + let mut encoded = [0u8; Self::ENCODED_LEN]; + encoded[..32].copy_from_slice(self.reference.as_ref()); + encoded[32..40].copy_from_slice(&self.block_number.to_be_bytes()); + encoded[40..48].copy_from_slice(&self.transaction_index.to_be_bytes()); + encoded[48..].copy_from_slice(self.transaction_hash.as_ref()); + encoded + } +} + +impl Decode for ReferenceIndexKey { + fn decode(value: &[u8]) -> Result { + if value.len() != Self::ENCODED_LEN { + return Err(DatabaseError::Decode); + } + + Ok(Self { + reference: B256::new(value[..32].try_into().map_err(|_| DatabaseError::Decode)?), + block_number: u64::from_be_bytes( + value[32..40] + .try_into() + .map_err(|_| DatabaseError::Decode)?, + ), + transaction_index: u64::from_be_bytes( + value[40..48] + .try_into() + .map_err(|_| DatabaseError::Decode)?, + ), + transaction_hash: B256::new(value[48..].try_into().map_err(|_| DatabaseError::Decode)?), + }) + } +} + +/// Key for listing references seen in a block. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] +pub struct BlockReferenceKey { + pub block_number: u64, + pub transaction_index: u64, + pub transaction_hash: B256, +} + +impl BlockReferenceKey { + /// Encoded length: block number(8) + transaction index(8) + hash(32). + pub const ENCODED_LEN: usize = 48; +} + +impl Encode for BlockReferenceKey { + type Encoded = [u8; Self::ENCODED_LEN]; + + fn encode(self) -> Self::Encoded { + let mut encoded = [0u8; Self::ENCODED_LEN]; + encoded[..8].copy_from_slice(&self.block_number.to_be_bytes()); + encoded[8..16].copy_from_slice(&self.transaction_index.to_be_bytes()); + encoded[16..].copy_from_slice(self.transaction_hash.as_ref()); + encoded + } +} + +impl Decode for BlockReferenceKey { + fn decode(value: &[u8]) -> Result { + if value.len() != Self::ENCODED_LEN { + return Err(DatabaseError::Decode); + } + + Ok(Self { + block_number: u64::from_be_bytes( + value[..8].try_into().map_err(|_| DatabaseError::Decode)?, + ), + transaction_index: u64::from_be_bytes( + value[8..16].try_into().map_err(|_| DatabaseError::Decode)?, + ), + transaction_hash: B256::new(value[16..].try_into().map_err(|_| DatabaseError::Decode)?), + }) + } +} + +/// Key for tracking indexed canonical blocks. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] +pub struct IndexedBlockKey { + pub block_number: u64, +} + +impl IndexedBlockKey { + /// Encoded length: block number(8). + pub const ENCODED_LEN: usize = 8; +} + +impl Encode for IndexedBlockKey { + type Encoded = [u8; Self::ENCODED_LEN]; + + fn encode(self) -> Self::Encoded { + self.block_number.to_be_bytes() + } +} + +impl Decode for IndexedBlockKey { + fn decode(value: &[u8]) -> Result { + if value.len() != Self::ENCODED_LEN { + return Err(DatabaseError::Decode); + } + + Ok(Self { + block_number: u64::from_be_bytes(value.try_into().map_err(|_| DatabaseError::Decode)?), + }) + } +} + +/// Key for reference index metadata entries. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] +pub struct MetaKey(pub u8); + +impl MetaKey { + /// Encoded length: metadata discriminator(1). + pub const ENCODED_LEN: usize = 1; +} + +impl Encode for MetaKey { + type Encoded = [u8; Self::ENCODED_LEN]; + + fn encode(self) -> Self::Encoded { + [self.0] + } +} + +impl Decode for MetaKey { + fn decode(value: &[u8]) -> Result { + match value { + [key] => Ok(Self(*key)), + _ => Err(DatabaseError::Decode), + } + } +} + +/// Block timestamp stored for a reference index hit. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub struct BlockTimestampValue(pub u64); + +/// Morph transaction reference stored for a block-scoped key. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub struct ReferenceValue(pub B256); + +/// Canonical block hash for an indexed block. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub struct BlockHashValue(pub B256); + +/// Arbitrary small metadata payload. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct MetaValue(pub Vec); + +impl Compress for BlockTimestampValue { + type Compressed = Vec; + + fn compress_to_buf>(&self, buf: &mut B) { + reth_codecs::__private::bytes::BufMut::put_slice(buf, &self.0.to_be_bytes()); + } +} + +impl Decompress for BlockTimestampValue { + fn decompress(value: &[u8]) -> Result { + Ok(Self(u64::from_be_bytes( + value.try_into().map_err(|_| DatabaseError::Decode)?, + ))) + } +} + +macro_rules! impl_b256_value_codec { + ($name:ident) => { + impl Compress for $name { + type Compressed = Vec; + + fn uncompressable_ref(&self) -> Option<&[u8]> { + Some(self.0.as_ref()) + } + + fn compress_to_buf>( + &self, + buf: &mut B, + ) { + reth_codecs::__private::bytes::BufMut::put_slice(buf, self.0.as_ref()); + } + } + + impl Decompress for $name { + fn decompress(value: &[u8]) -> Result { + Ok(Self(B256::new( + value.try_into().map_err(|_| DatabaseError::Decode)?, + ))) + } + } + }; +} + +impl_b256_value_codec!(ReferenceValue); +impl_b256_value_codec!(BlockHashValue); + +impl Compress for MetaValue { + type Compressed = Vec; + + fn uncompressable_ref(&self) -> Option<&[u8]> { + Some(&self.0) + } + + fn compress_to_buf>(&self, buf: &mut B) { + reth_codecs::__private::bytes::BufMut::put_slice(buf, &self.0); + } +} + +impl Decompress for MetaValue { + fn decompress(value: &[u8]) -> Result { + Ok(Self(value.to_vec())) + } + + fn decompress_owned(value: Vec) -> Result { + Ok(Self(value)) + } +} + +tables! { + /// Maps reference query keys to their block timestamp. + table ReferenceIndex { + type Key = ReferenceIndexKey; + type Value = BlockTimestampValue; + } + + /// Maps block-scoped transaction keys back to Morph transaction references. + table BlockReferenceIndex { + type Key = BlockReferenceKey; + type Value = ReferenceValue; + } + + /// Tracks canonical blocks that have been indexed. + table IndexedBlocks { + type Key = IndexedBlockKey; + type Value = BlockHashValue; + } + + /// Stores reference index metadata. + table IndexMeta { + type Key = MetaKey; + type Value = MetaValue; + } +} + +/// Table set for the Morph transaction reference index database. +pub use Tables as ReferenceIndexTables; + +#[cfg(test)] +mod tests { + use super::*; + use alloy_primitives::B256; + use reth_db_api::{ + DatabaseError, + table::{Decode, Encode}, + }; + + fn b256(byte: u8) -> B256 { + B256::repeat_byte(byte) + } + + #[test] + fn reference_index_key_sorts_by_reference_then_block_then_tx_index() { + let by_reference = ReferenceIndexKey { + reference: b256(1), + block_number: 10, + transaction_index: 0, + transaction_hash: b256(1), + }; + let by_block = ReferenceIndexKey { + reference: b256(2), + block_number: 9, + transaction_index: 9, + transaction_hash: b256(1), + }; + let by_tx_index = ReferenceIndexKey { + reference: b256(2), + block_number: 10, + transaction_index: 0, + transaction_hash: b256(1), + }; + let later_tx_index = ReferenceIndexKey { + reference: b256(2), + block_number: 10, + transaction_index: 1, + transaction_hash: b256(0), + }; + + let mut encoded = [ + by_block.encode(), + later_tx_index.encode(), + by_reference.encode(), + by_tx_index.encode(), + ]; + encoded.sort(); + + assert_eq!( + encoded, + [ + by_reference.encode(), + by_block.encode(), + by_tx_index.encode(), + later_tx_index.encode(), + ] + ); + } + + #[test] + fn reference_index_key_roundtrip() { + let key = ReferenceIndexKey { + reference: b256(0x11), + block_number: 0x0102_0304_0506_0708, + transaction_index: 0x1112_1314_1516_1718, + transaction_hash: b256(0x22), + }; + + let encoded = key.encode(); + + assert_eq!(encoded.len(), ReferenceIndexKey::ENCODED_LEN); + assert_eq!(ReferenceIndexKey::decode(encoded.as_ref()).unwrap(), key); + } + + #[test] + fn reference_index_key_decode_rejects_wrong_length() { + assert!(matches!( + ReferenceIndexKey::decode(&[0u8; ReferenceIndexKey::ENCODED_LEN - 1]), + Err(DatabaseError::Decode) + )); + assert!(matches!( + ReferenceIndexKey::decode(&[0u8; ReferenceIndexKey::ENCODED_LEN + 1]), + Err(DatabaseError::Decode) + )); + } + + #[test] + fn block_reference_key_sorts_and_roundtrips() { + let by_block = BlockReferenceKey { + block_number: 7, + transaction_index: 3, + transaction_hash: b256(1), + }; + let by_tx_index = BlockReferenceKey { + block_number: 8, + transaction_index: 1, + transaction_hash: b256(1), + }; + let later_tx_index = BlockReferenceKey { + block_number: 8, + transaction_index: 2, + transaction_hash: b256(0), + }; + + let mut encoded = [ + later_tx_index.encode(), + by_tx_index.encode(), + by_block.encode(), + ]; + encoded.sort(); + + assert_eq!( + encoded, + [ + by_block.encode(), + by_tx_index.encode(), + later_tx_index.encode(), + ] + ); + assert_eq!( + BlockReferenceKey::decode(by_tx_index.encode().as_ref()).unwrap(), + by_tx_index + ); + } +} diff --git a/crates/reference-index/src/types.rs b/crates/reference-index/src/types.rs new file mode 100644 index 0000000..e28abce --- /dev/null +++ b/crates/reference-index/src/types.rs @@ -0,0 +1,101 @@ +use alloy_primitives::{B256, U64}; +use serde::{Deserialize, Serialize}; + +/// Current reference index database schema version. +pub const SCHEMA_VERSION: u32 = 1; + +/// Stored Jade activation sentinel for chains where Jade has not activated. +pub const JADE_NOT_ACTIVE_SENTINEL: u64 = u64::MAX; + +/// Persistent backfill progress state. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[repr(u8)] +pub enum BackfillState { + NotStarted = 0, + InProgress = 1, + Complete = 2, +} + +impl TryFrom for BackfillState { + type Error = ReferenceIndexError; + + fn try_from(value: u8) -> Result { + match value { + 0 => Ok(Self::NotStarted), + 1 => Ok(Self::InProgress), + 2 => Ok(Self::Complete), + other => Err(ReferenceIndexError::InvalidBackfillState(other)), + } + } +} + +/// Validated query parameters for reference lookups. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ReferenceQuery { + pub reference: B256, + pub offset: u64, + pub limit: u64, +} + +impl ReferenceQuery { + pub const DEFAULT_LIMIT: u64 = 100; + pub const MAX_LIMIT: u64 = 100; + pub const MAX_OFFSET: u64 = 10_000; + + pub fn new( + reference: B256, + offset: Option, + limit: Option, + ) -> Result { + let offset = offset.unwrap_or_default(); + let limit = limit.unwrap_or(Self::DEFAULT_LIMIT); + + if limit > Self::MAX_LIMIT { + return Err(ReferenceIndexError::LimitTooLarge { limit }); + } + if offset > Self::MAX_OFFSET { + return Err(ReferenceIndexError::OffsetTooLarge { offset }); + } + + Ok(Self { + reference, + offset, + limit, + }) + } +} + +/// RPC result entry for a Morph transaction reference hit. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ReferenceTransactionResult { + pub transaction_hash: B256, + pub block_number: U64, + pub block_timestamp: U64, + pub transaction_index: U64, +} + +/// Errors returned by the reference index. +#[derive(Debug, thiserror::Error)] +pub enum ReferenceIndexError { + #[error("reference index initializing")] + Initializing, + #[error("reference index is behind")] + IndexBehind, + #[error("reference query limit too large: {limit}")] + LimitTooLarge { limit: u64 }, + #[error("reference query offset too large: {offset}")] + OffsetTooLarge { offset: u64 }, + #[error("invalid backfill state: {0}")] + InvalidBackfillState(u8), + #[error("reference index chain identity mismatch: {0}")] + ChainIdentityMismatch(&'static str), + #[error("reference index schema mismatch: expected {expected}, got {actual}")] + SchemaMismatch { expected: u32, actual: u32 }, + #[error(transparent)] + Database(#[from] reth_db_api::DatabaseError), + #[error(transparent)] + Provider(#[from] reth_errors::ProviderError), + #[error(transparent)] + Other(#[from] eyre::Report), +} diff --git a/crates/reference-index/src/writer.rs b/crates/reference-index/src/writer.rs new file mode 100644 index 0000000..f275c41 --- /dev/null +++ b/crates/reference-index/src/writer.rs @@ -0,0 +1,285 @@ +//! Reference index write path helpers. +//! +//! All write functions take an already-open write transaction so that the +//! caller can batch multiple blocks (backfill) or delete+write (reorg) in a +//! single atomic commit. + +use crate::{ + db::{IndexMetaKey, encode_b256, encode_u64}, + tables::{ + BlockHashValue, BlockReferenceIndex, BlockReferenceKey, BlockTimestampValue, + IndexMeta, IndexedBlockKey, IndexedBlocks, MetaValue, ReferenceIndex, ReferenceIndexKey, + ReferenceValue, + }, + types::{BackfillState, ReferenceIndexError}, +}; +use alloy_consensus::transaction::TxHashRef; +use alloy_primitives::B256; +use morph_primitives::MorphTxEnvelope; +use reth_db_api::{cursor::DbCursorRO, transaction::DbTxMut}; + +/// Index one canonical block into all three data tables. +/// +/// Returns the number of reference entries written. This function is +/// idempotent: re-writing the same block overwrites previously stored rows +/// with the same canonical values. +pub fn write_block( + tx: &Tx, + block_number: u64, + block_hash: B256, + block_timestamp: u64, + transactions: &[MorphTxEnvelope], +) -> Result { + tx.put::(IndexedBlockKey { block_number }, BlockHashValue(block_hash))?; + + let mut written = 0u64; + for (idx, transaction) in transactions.iter().enumerate() { + let Some(reference) = transaction.reference() else { + continue; + }; + let transaction_index = idx as u64; + let transaction_hash = *transaction.tx_hash(); + + let reference_key = ReferenceIndexKey { + reference, + block_number, + transaction_index, + transaction_hash, + }; + tx.put::(reference_key, BlockTimestampValue(block_timestamp))?; + tx.put::( + BlockReferenceKey { + block_number, + transaction_index, + transaction_hash, + }, + ReferenceValue(reference), + )?; + written += 1; + } + + if written > 0 { + crate::metrics::increment_entries(written); + } + Ok(written) +} + +/// Delete all reference-index state for a single block number. +/// +/// Implements the reverse of [`write_block`]: reads every +/// `BlockReferenceIndex` row for `block_number`, reconstructs each +/// `ReferenceIndex` key, and removes entries from all three tables. +pub fn delete_block( + tx: &Tx, + block_number: u64, +) -> Result<(), ReferenceIndexError> { + // 1. Collect all BlockReferenceIndex rows for this block. + let mut entries = Vec::new(); + { + let mut cursor = tx.cursor_write::()?; + let start = BlockReferenceKey { + block_number, + transaction_index: 0, + transaction_hash: B256::ZERO, + }; + let mut next = cursor.seek(start)?; + while let Some((key, value)) = next { + if key.block_number != block_number { + break; + } + entries.push((key, value.0)); + next = cursor.next()?; + } + } + + // 2. Delete each ReferenceIndex + BlockReferenceIndex row. + for (key, reference) in entries { + tx.delete::( + ReferenceIndexKey { + reference, + block_number: key.block_number, + transaction_index: key.transaction_index, + transaction_hash: key.transaction_hash, + }, + None, + )?; + tx.delete::(key, None)?; + } + + // 3. Remove the IndexedBlocks row. + tx.delete::(IndexedBlockKey { block_number }, None)?; + Ok(()) +} + +// ── metadata writes ────────────────────────────────────────────────────────── + +pub fn update_indexed_to( + tx: &Tx, + block_number: u64, +) -> Result<(), ReferenceIndexError> { + tx.put::(IndexMetaKey::IndexedTo.into(), encode_u64(block_number))?; + Ok(()) +} + +pub fn update_indexed_from( + tx: &Tx, + block_number: u64, +) -> Result<(), ReferenceIndexError> { + tx.put::(IndexMetaKey::IndexedFrom.into(), encode_u64(block_number))?; + Ok(()) +} + +pub fn update_backfill_current( + tx: &Tx, + block_number: u64, +) -> Result<(), ReferenceIndexError> { + tx.put::( + IndexMetaKey::BackfillCurrent.into(), + encode_u64(block_number), + )?; + Ok(()) +} + +pub fn set_backfill_state( + tx: &Tx, + state: BackfillState, +) -> Result<(), ReferenceIndexError> { + tx.put::( + IndexMetaKey::BackfillState.into(), + MetaValue(vec![state as u8]), + )?; + crate::metrics::set_backfill_state(state as u8); + Ok(()) +} + +pub fn set_jade_first_block_number( + tx: &Tx, + block_number: u64, +) -> Result<(), ReferenceIndexError> { + tx.put::( + IndexMetaKey::JadeFirstBlockNumber.into(), + encode_u64(block_number), + )?; + Ok(()) +} + +pub fn set_snapshot_block( + tx: &Tx, + block_number: u64, + block_hash: B256, +) -> Result<(), ReferenceIndexError> { + tx.put::( + IndexMetaKey::SnapshotBlockNumber.into(), + encode_u64(block_number), + )?; + tx.put::( + IndexMetaKey::SnapshotBlockHash.into(), + encode_b256(block_hash), + )?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ReferenceIndexDb; + use alloy_consensus::transaction::TxEip1559; + use alloy_primitives::{Address, Signature, TxKind, U256}; + use morph_primitives::MorphTxEnvelope; + use reth_db_api::transaction::DbTx; + use tempfile::TempDir; + + fn unsigned_sig() -> Signature { + Signature::new(U256::from(1u64), U256::from(1u64), false) + } + + /// A MorphTx wrapper that returns `Some(reference)` when queried. We + /// don't have a simple TxMorph factory in this crate, so we fabricate + /// a test envelope by using the Eip1559 variant and pairing the expected + /// hash via the trait impl. + /// + /// For now we test write_block/delete_block indirectly by checking that + /// `write_block` writes the `IndexedBlocks` row for blocks without any + /// reference-carrying tx, and `delete_block` removes it. + fn tx_without_reference() -> MorphTxEnvelope { + let tx = TxEip1559 { + chain_id: 2818, + nonce: 0, + gas_limit: 21_000, + max_fee_per_gas: 1, + max_priority_fee_per_gas: 0, + to: TxKind::Call(Address::ZERO), + value: U256::ZERO, + access_list: Default::default(), + input: Default::default(), + }; + let signed = alloy_consensus::Signed::new_unchecked(tx, unsigned_sig(), B256::ZERO); + MorphTxEnvelope::Eip1559(signed) + } + + #[test] + fn write_block_without_reference_tx_writes_indexed_blocks_row() { + let dir = TempDir::new().unwrap(); + let db = ReferenceIndexDb::open(dir.path(), 2818, B256::ZERO).unwrap(); + + let txs = vec![tx_without_reference()]; + let tx = db.tx_mut().unwrap(); + let written = write_block(&tx, 42, B256::repeat_byte(0xaa), 1_000, &txs).unwrap(); + update_indexed_to(&tx, 42).unwrap(); + tx.commit().unwrap(); + + assert_eq!(written, 0); + assert_eq!(db.indexed_to().unwrap(), 42); + assert_eq!( + db.indexed_block_hash(42).unwrap(), + Some(B256::repeat_byte(0xaa)) + ); + } + + #[test] + fn delete_block_removes_indexed_blocks_row() { + let dir = TempDir::new().unwrap(); + let db = ReferenceIndexDb::open(dir.path(), 2818, B256::ZERO).unwrap(); + + let tx = db.tx_mut().unwrap(); + write_block(&tx, 7, B256::repeat_byte(0x11), 100, &[]).unwrap(); + update_indexed_to(&tx, 7).unwrap(); + tx.commit().unwrap(); + assert_eq!(db.indexed_block_hash(7).unwrap(), Some(B256::repeat_byte(0x11))); + + let tx = db.tx_mut().unwrap(); + delete_block(&tx, 7).unwrap(); + tx.commit().unwrap(); + assert_eq!(db.indexed_block_hash(7).unwrap(), None); + } + + #[test] + fn metadata_updates_are_visible_after_commit() { + let dir = TempDir::new().unwrap(); + let db = ReferenceIndexDb::open(dir.path(), 2818, B256::ZERO).unwrap(); + + let tx = db.tx_mut().unwrap(); + update_indexed_from(&tx, 100).unwrap(); + update_indexed_to(&tx, 200).unwrap(); + update_backfill_current(&tx, 150).unwrap(); + set_backfill_state(&tx, BackfillState::Complete).unwrap(); + set_jade_first_block_number(&tx, 100).unwrap(); + tx.commit().unwrap(); + + assert_eq!(db.indexed_from().unwrap(), Some(100)); + assert_eq!(db.indexed_to().unwrap(), 200); + assert_eq!(db.backfill_state().unwrap(), BackfillState::Complete); + assert_eq!(db.jade_first_block_number().unwrap(), Some(100)); + + // Sanity: a fresh read transaction also sees the backfill_current we set. + let tx = db.tx().unwrap(); + let raw = tx + .get::(IndexMetaKey::BackfillCurrent.into()) + .unwrap() + .unwrap(); + assert_eq!( + u64::from_be_bytes(raw.0.as_slice().try_into().unwrap()), + 150 + ); + } +} From b1dbc92cc4bc55cca25ba567e03930711f909053 Mon Sep 17 00:00:00 2001 From: panos Date: Thu, 30 Apr 2026 15:47:47 +0800 Subject: [PATCH 02/15] feat(rpc): wire morph_getTransactionHashesByReference RPC + ExEx Implements the RPC namespace, ExEx, and node integration that back the new reference index storage layer added in the previous commit. - morph-rpc: new morph_ namespace with getTransactionHashesByReference - returns -32000 "reference index initializing" before Task A completes - returns -32000 "reference index is behind" when lag exceeds threshold - validates limit/offset bounds with -32602 - morph-node: ReferenceIndexControl (watch-channel-coordinated) wires the startup indexing task (Task A) with the ExEx (Task B) - Task A: maybe_reset_jade_sentinel -> backfill -> reconcile -> is_ready=true -> startup FinishedHeight - Task B: drains notifications from node launch, gates writes on is_ready, gap-fills from main DB on first is_ready notification, processes ChainCommitted/Reverted/Reorged with three-table atomic writes, sends FinishedHeight(BlockNumHash) - MorphAddOns: optional reference_index control; when present spawns Task A via task_executor and registers the morph_ RPC handler - bin/morph-reth: opens reference index DB under /morph/reference_index, installs the ExEx, and injects the control into MorphAddOns --- Cargo.lock | 8 + Cargo.toml | 3 + bin/morph-reth/Cargo.toml | 2 + bin/morph-reth/src/main.rs | 39 ++- crates/node/Cargo.toml | 14 +- crates/node/src/add_ons.rs | 57 ++++- crates/node/src/exex/mod.rs | 7 + crates/node/src/exex/reference_index.rs | 304 ++++++++++++++++++++++++ crates/node/src/lib.rs | 1 + crates/rpc/Cargo.toml | 1 + crates/rpc/src/lib.rs | 2 + crates/rpc/src/morph/handler.rs | 86 +++++++ crates/rpc/src/morph/mod.rs | 10 + crates/rpc/src/morph/rpc.rs | 42 ++++ 14 files changed, 567 insertions(+), 9 deletions(-) create mode 100644 crates/node/src/exex/mod.rs create mode 100644 crates/node/src/exex/reference_index.rs create mode 100644 crates/rpc/src/morph/handler.rs create mode 100644 crates/rpc/src/morph/mod.rs create mode 100644 crates/rpc/src/morph/rpc.rs diff --git a/Cargo.lock b/Cargo.lock index acdcb32..a5c6c3b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4843,15 +4843,18 @@ dependencies = [ "morph-payload-builder", "morph-payload-types", "morph-primitives", + "morph-reference-index", "morph-rpc", "morph-txpool", "parking_lot", "reth-chainspec", "reth-db", + "reth-db-api", "reth-e2e-test-utils", "reth-engine-local", "reth-engine-tree", "reth-errors", + "reth-exex", "reth-node-api", "reth-node-builder", "reth-node-core", @@ -4862,6 +4865,7 @@ dependencies = [ "reth-provider", "reth-rpc-builder", "reth-rpc-eth-api", + "reth-storage-api", "reth-tasks", "reth-tracing", "reth-transaction-pool", @@ -4870,6 +4874,7 @@ dependencies = [ "serde_json", "tokio", "tokio-stream", + "tracing", "vergen", "vergen-git2", ] @@ -4981,6 +4986,8 @@ dependencies = [ "morph-consensus", "morph-evm", "morph-node", + "morph-reference-index", + "reth-chainspec", "reth-cli", "reth-cli-util", "reth-ethereum-cli", @@ -5028,6 +5035,7 @@ dependencies = [ "morph-chainspec", "morph-evm", "morph-primitives", + "morph-reference-index", "morph-revm", "reth-chainspec", "reth-errors", diff --git a/Cargo.toml b/Cargo.toml index 55f86e5..328097b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -74,6 +74,9 @@ reth-engine-primitives = { git = "https://github.com/morph-l2/reth", rev = "1b07 reth-engine-tree = { git = "https://github.com/morph-l2/reth", rev = "1b0702546633c259306017717b2938f14adfe329" } reth-errors = { git = "https://github.com/morph-l2/reth", rev = "1b0702546633c259306017717b2938f14adfe329" } reth-eth-wire-types = { git = "https://github.com/morph-l2/reth", rev = "1b0702546633c259306017717b2938f14adfe329" } +reth-exex = { git = "https://github.com/morph-l2/reth", rev = "1b0702546633c259306017717b2938f14adfe329" } +reth-exex-test-utils = { git = "https://github.com/morph-l2/reth", rev = "1b0702546633c259306017717b2938f14adfe329" } +reth-exex-types = { git = "https://github.com/morph-l2/reth", rev = "1b0702546633c259306017717b2938f14adfe329" } reth-ethereum = { git = "https://github.com/morph-l2/reth", rev = "1b0702546633c259306017717b2938f14adfe329" } reth-ethereum-cli = { git = "https://github.com/morph-l2/reth", rev = "1b0702546633c259306017717b2938f14adfe329" } reth-ethereum-consensus = { git = "https://github.com/morph-l2/reth", rev = "1b0702546633c259306017717b2938f14adfe329" } diff --git a/bin/morph-reth/Cargo.toml b/bin/morph-reth/Cargo.toml index 4512cf2..cf18741 100644 --- a/bin/morph-reth/Cargo.toml +++ b/bin/morph-reth/Cargo.toml @@ -20,8 +20,10 @@ morph-chainspec = { workspace = true, features = ["cli"] } morph-consensus.workspace = true morph-evm.workspace = true morph-node.workspace = true +morph-reference-index.workspace = true # Reth CLI +reth-chainspec.workspace = true reth-cli.workspace = true reth-cli-util.workspace = true reth-ethereum-cli.workspace = true diff --git a/bin/morph-reth/src/main.rs b/bin/morph-reth/src/main.rs index 624da7f..b416626 100644 --- a/bin/morph-reth/src/main.rs +++ b/bin/morph-reth/src/main.rs @@ -7,9 +7,15 @@ use clap::Parser; use morph_chainspec::{MorphChainSpec, MorphChainSpecParser}; use morph_consensus::MorphConsensus; use morph_evm::{MorphEvmConfig, evm::MorphEvmFactory}; -use morph_node::{MorphArgs, MorphNode}; +use morph_node::{ + MorphAddOns, MorphArgs, MorphNode, + exex::{ReferenceIndexControl, reference_index_exex}, +}; +use morph_reference_index::ReferenceIndexDb; +use reth_chainspec::EthChainSpec; use reth_cli_util::sigsegv_handler; use reth_ethereum_cli::Cli; +use reth_node_builder::Node; use reth_rpc_server_types::DefaultRpcModuleValidator; use std::sync::Arc; use tracing::info; @@ -43,8 +49,37 @@ fn main() { .run_with_components::(components, async move |builder, morph_args| { info!(target: "morph::cli", "Starting Morph-Reth node"); + // Open the reference index DB before launching the node so we + // can wire it into both the ExEx and the add-ons. + let chain_spec = builder.config().chain.clone(); + let datadir = builder.config().datadir(); + let reference_index_path = + datadir.data_dir().join("morph").join("reference_index"); + let chain_id = chain_spec.chain().id(); + let genesis_hash = chain_spec.genesis_hash(); // from EthChainSpec trait + + info!( + target: "morph::reference_index", + path = %reference_index_path.display(), + chain_id, + "opening Morph reference index database" + ); + let db = ReferenceIndexDb::open(&reference_index_path, chain_id, genesis_hash)?; + let (control, startup_rx) = ReferenceIndexControl::new(db); + + let exex_control = control.clone(); + let node = MorphNode::new(morph_args); + let handle = builder - .node(MorphNode::new(morph_args)) + .with_types::() + .with_components(node.components_builder()) + .with_add_ons(MorphAddOns::new().with_reference_index(control)) + .install_exex( + "morph-reference-index", + async move |ctx| { + Ok(reference_index_exex(ctx, exex_control, startup_rx)) + }, + ) .launch_with_debug_capabilities() .await?; diff --git a/crates/node/Cargo.toml b/crates/node/Cargo.toml index 93842b5..34ec2fe 100644 --- a/crates/node/Cargo.toml +++ b/crates/node/Cargo.toml @@ -18,16 +18,19 @@ morph-evm.workspace = true morph-payload-builder.workspace = true morph-payload-types.workspace = true morph-primitives = { workspace = true, features = ["reth-codec"] } +morph-reference-index.workspace = true morph-rpc.workspace = true morph-txpool.workspace = true # Reth dependencies reth-db.workspace = true +reth-db-api.workspace = true reth-node-core.workspace = true reth-chainspec.workspace = true reth-engine-local.workspace = true reth-engine-tree.workspace = true reth-errors.workspace = true +reth-exex.workspace = true reth-node-api.workspace = true reth-node-builder.workspace = true reth-node-ethereum.workspace = true @@ -37,12 +40,15 @@ reth-primitives-traits.workspace = true reth-provider.workspace = true reth-rpc-builder.workspace = true reth-rpc-eth-api.workspace = true +reth-storage-api.workspace = true +reth-tasks.workspace = true reth-transaction-pool.workspace = true reth-tracing.workspace = true reth-trie.workspace = true # Alloy alloy-consensus.workspace = true +alloy-eips.workspace = true alloy-genesis.workspace = true alloy-hardforks.workspace = true alloy-primitives.workspace = true @@ -54,16 +60,15 @@ eyre.workspace = true clap.workspace = true dashmap.workspace = true parking_lot.workspace = true +tokio = { workspace = true, features = ["sync", "rt"] } tokio-stream.workspace = true +tracing.workspace = true serde = { workspace = true, features = ["derive"] } serde_json.workspace = true # Optional: E2E testing framework reth-e2e-test-utils = { workspace = true, optional = true } -reth-tasks = { workspace = true, optional = true } -tokio = { workspace = true, features = ["sync"], optional = true } -alloy-eips = { workspace = true, optional = true } alloy-rlp = { workspace = true, optional = true } alloy-signer = { workspace = true, optional = true } alloy-signer-local = { workspace = true, optional = true } @@ -98,10 +103,7 @@ required-features = ["test-utils"] default = [] test-utils = [ "dep:reth-e2e-test-utils", - "dep:reth-tasks", "dep:alloy-signer", - "dep:tokio", - "dep:alloy-eips", "dep:alloy-rlp", "dep:alloy-signer-local", ] diff --git a/crates/node/src/add_ons.rs b/crates/node/src/add_ons.rs index e2b84b2..c1f9e24 100644 --- a/crates/node/src/add_ons.rs +++ b/crates/node/src/add_ons.rs @@ -2,11 +2,16 @@ use crate::{ MorphNode, + exex::ReferenceIndexControl, validator::{MorphEngineValidatorBuilder, MorphTreeEngineValidatorBuilder}, }; use morph_evm::MorphEvmConfig; use morph_primitives::{Block, MorphHeader, MorphReceipt}; -use morph_rpc::{MorphEthApiBuilder, MorphEthConfigApiServer, MorphEthConfigHandler}; +use morph_reference_index::{DEFAULT_LAG_THRESHOLD, ReferenceIndexReader}; +use morph_rpc::{ + MorphEthApiBuilder, MorphEthConfigApiServer, MorphEthConfigHandler, + morph::{MorphRpc, MorphRpcHandler, MorphRpcServer}, +}; use reth_node_api::{AddOnsContext, FullNodeComponents, FullNodeTypes, NodeAddOns, NodePrimitives}; use reth_node_builder::{ NodeAdapter, @@ -39,6 +44,10 @@ pub struct MorphAddOns< > { /// Inner RPC add-ons from reth. inner: RpcAddOns, + /// Optional reference-index control injected by `main.rs`. When present + /// the add-on spawns startup indexing on launch and registers the + /// `morph_` RPC namespace. + reference_index: Option, } impl MorphAddOns, MorphEthApiBuilder> @@ -59,8 +68,16 @@ where MorphTreeEngineValidatorBuilder::new(pvb), Identity::default(), ), + reference_index: None, } } + + /// Attach a reference index control so the add-on can spawn startup + /// indexing and register the `morph_` RPC namespace on launch. + pub fn with_reference_index(mut self, control: ReferenceIndexControl) -> Self { + self.reference_index = Some(control); + self + } } impl Default for MorphAddOns, MorphEthApiBuilder> @@ -75,6 +92,7 @@ where } } + impl NodeAddOns for MorphAddOns where N: FullNodeComponents, @@ -116,6 +134,34 @@ where } }); + // Spawn reference index startup indexing (Task A) if configured. + let reference_rpc_handler = if let Some(control) = self.reference_index { + let startup_control = control.clone(); + let startup_node = ctx.node.clone(); + task_executor.spawn_critical("morph reference index startup", async move { + if let Err(err) = tokio::task::spawn_blocking(move || { + crate::exex::run_startup_indexing(&startup_node, &startup_control) + }) + .await + .unwrap_or_else(|e| Err(eyre::eyre!("panic: {e}"))) + { + tracing::error!( + target: "morph::reference_index", + ?err, + "reference index startup indexing failed" + ); + } + }); + + let morph_rpc_ctx = MorphRpc::new( + ReferenceIndexReader::new(control.db.clone(), DEFAULT_LAG_THRESHOLD), + provider.clone(), + ); + Some(MorphRpcHandler::new(morph_rpc_ctx)) + } else { + None + }; + // Use launch_add_ons_with to register custom Engine API and eth_config self.inner .launch_add_ons_with(ctx, move |container| { @@ -133,6 +179,15 @@ where .map_err(|e| eyre::eyre!("Failed to register eth_config handler: {}", e))?; tracing::info!(target: "morph::node", "Morph eth_config handler registered successfully"); + // Register morph_ RPC namespace (if reference index was configured). + if let Some(handler) = reference_rpc_handler { + tracing::debug!(target: "morph::node", "Registering morph_ RPC namespace"); + modules + .merge_configured(handler.into_rpc()) + .map_err(|e| eyre::eyre!("Failed to register morph_ RPC: {}", e))?; + tracing::info!(target: "morph::node", "morph_ RPC namespace registered"); + } + // Create and register Morph L2 Engine API tracing::debug!(target: "morph::node", "Registering Morph L2 Engine API"); diff --git a/crates/node/src/exex/mod.rs b/crates/node/src/exex/mod.rs new file mode 100644 index 0000000..045210c --- /dev/null +++ b/crates/node/src/exex/mod.rs @@ -0,0 +1,7 @@ +//! Morph-specific Execution Extensions (ExEx). + +pub mod reference_index; + +pub use reference_index::{ + ReferenceIndexControl, reference_index_exex, run_startup_indexing, +}; diff --git a/crates/node/src/exex/reference_index.rs b/crates/node/src/exex/reference_index.rs new file mode 100644 index 0000000..15e2fd8 --- /dev/null +++ b/crates/node/src/exex/reference_index.rs @@ -0,0 +1,304 @@ +//! Reference index ExEx: drains canonical chain notifications and keeps the +//! index incrementally up to date. +//! +//! ## Lifecycle +//! +//! **Task A** (`run_startup_indexing`): runs in a spawned task, executes +//! backfill → reconcile, then sets `is_ready = true` and signals the ExEx with +//! `FinishedHeight(indexed_to)`. +//! +//! **Task B** (`reference_index_exex`): registered via `install_exex` and +//! started by reth's framework at node launch. It drains all notifications +//! immediately to avoid backpressure; writes are gated behind `is_ready`. + +use alloy_consensus::BlockHeader; +use alloy_eips::BlockNumHash; +use morph_chainspec::spec::MorphChainSpec; +use morph_primitives::MorphPrimitives; +use morph_reference_index::{ + DEFAULT_BACKFILL_BATCH_BLOCKS, DEFAULT_MAX_REORG_DEPTH, ReferenceIndexDb, + backfill::{maybe_reset_jade_sentinel, run_backfill}, + reconcile::run_startup_reconcile, + writer::{delete_block, update_indexed_to, write_block}, +}; +use reth_db_api::transaction::DbTx; +use reth_exex::{ExExContext, ExExEvent, ExExNotification}; +use reth_node_api::{FullNodeComponents, NodeTypes}; +use reth_provider::{BlockHashReader, BlockReader, BlockNumReader, ChainSpecProvider, HeaderProvider}; +use reth_storage_api::TransactionVariant; +use tokio::sync::watch; +use tokio_stream::StreamExt; +use tracing::{debug, error, info}; + +// ── shared control ──────────────────────────────────────────────────────────── + +/// Shared handle that connects Task A (startup indexing) with Task B (ExEx). +/// +/// Task A completes, sets `is_ready`, then sends the startup +/// `FinishedHeight` through the watch channel. Task B receives this and +/// forwards it to reth's ExEx event bus. +#[derive(Clone, Debug)] +pub struct ReferenceIndexControl { + pub db: ReferenceIndexDb, + startup_tx: watch::Sender>, +} + +impl ReferenceIndexControl { + /// Create a new control pair. + /// + /// Returns `(control, receiver)`. The receiver must be passed to + /// [`reference_index_exex`] so the ExEx knows when startup has finished. + pub fn new(db: ReferenceIndexDb) -> (Self, watch::Receiver>) { + let (startup_tx, startup_rx) = watch::channel(None); + (Self { db, startup_tx }, startup_rx) + } + + /// Called by Task A after backfill + reconcile complete. + pub fn mark_startup_finished(&self, block: BlockNumHash) -> eyre::Result<()> { + self.startup_tx.send(Some(block))?; + Ok(()) + } +} + +// ── Task B: ExEx ────────────────────────────────────────────────────────────── + +/// Main ExEx loop. +/// +/// Drains notifications from node launch to avoid backpressure. While +/// `is_ready = false` each notification is discarded. After `is_ready` +/// the first notification triggers a gap check before normal processing. +pub async fn reference_index_exex( + mut ctx: ExExContext, + control: ReferenceIndexControl, + mut startup_rx: watch::Receiver>, +) -> eyre::Result<()> +where + Node: FullNodeComponents< + Types: NodeTypes< + Primitives = MorphPrimitives, + ChainSpec = MorphChainSpec, + >, + >, + Node::Provider: BlockReader + + BlockNumReader + + HeaderProvider
+ + BlockHashReader, +{ + let mut first_ready = true; + + loop { + tokio::select! { + // Forward the startup FinishedHeight when Task A finishes. + changed = startup_rx.changed() => { + if changed.is_ok() { + if let Some(block) = *startup_rx.borrow_and_update() { + debug!( + target: "morph::reference_index", + block_number = block.number, + "startup complete; sending initial FinishedHeight" + ); + ctx.events.send(ExExEvent::FinishedHeight(block))?; + } + } + } + + maybe_notification = ctx.notifications.try_next() => { + let Some(notification) = maybe_notification? else { break; }; + + if !control.db.is_ready() { + // Drain without writing to avoid backpressure. + if let Some(chain) = notification.committed_chain() { + debug!( + target: "morph::reference_index", + tip = chain.tip().number(), + "drained notification while index initializing" + ); + } + continue; + } + + // Gap check on first is_ready notification. + if first_ready { + first_ready = false; + if let Some(chain) = notification.committed_chain() { + let indexed_to = control.db.indexed_to()?; + let notif_start = chain.first().number(); + if notif_start > indexed_to + 1 { + fill_gap( + &control.db, + &ctx.components.provider().clone(), + indexed_to + 1, + notif_start - 1, + )?; + } + } + } + + if let Err(e) = + handle_notification(&ctx.events, &control.db, notification) + { + error!(target: "morph::reference_index", ?e, "error processing notification"); + return Err(e); + } + } + } + } + + Ok(()) +} + +/// Fill index entries from main DB for the gap `[from, to]`. +fn fill_gap( + db: &ReferenceIndexDb, + provider: &Provider, + from: u64, + to: u64, +) -> eyre::Result<()> +where + Provider: BlockReader, +{ + if from > to { + return Ok(()); + } + info!( + target: "morph::reference_index", + from, to, + "filling gap between startup reconcile and first ExEx notification" + ); + let tx = db.tx_mut()?; + for number in from..=to { + let block = provider + .sealed_block_with_senders(number.into(), TransactionVariant::NoHash)? + .ok_or_else(|| eyre::eyre!("missing block {number} during gap fill"))?; + write_block(&tx, block.number(), block.hash(), block.timestamp(), &block.body().transactions)?; + } + update_indexed_to(&tx, to)?; + tx.commit()?; + Ok(()) +} + +/// Process one ExEx notification: commit or revert three tables atomically. +fn handle_notification( + events: &tokio::sync::mpsc::UnboundedSender, + db: &ReferenceIndexDb, + notification: ExExNotification, +) -> eyre::Result<()> { + match notification { + ExExNotification::ChainCommitted { new } => { + let tx = db.tx_mut()?; + for block in new.blocks_iter() { + write_block( + &tx, + block.number(), + block.hash(), + block.timestamp(), + &block.body().transactions, + )?; + } + update_indexed_to(&tx, new.tip().number())?; + tx.commit()?; + events.send(ExExEvent::FinishedHeight(new.tip().num_hash()))?; + } + ExExNotification::ChainReverted { old } => { + let parent = old.first().number().saturating_sub(1); + let tx = db.tx_mut()?; + for block in old.blocks_iter() { + delete_block(&tx, block.number())?; + } + update_indexed_to(&tx, parent)?; + tx.commit()?; + // FinishedHeight not sent on revert per spec. + } + ExExNotification::ChainReorged { old, new } => { + let tx = db.tx_mut()?; + for block in old.blocks_iter() { + delete_block(&tx, block.number())?; + } + for block in new.blocks_iter() { + write_block( + &tx, + block.number(), + block.hash(), + block.timestamp(), + &block.body().transactions, + )?; + } + update_indexed_to(&tx, new.tip().number())?; + tx.commit()?; + events.send(ExExEvent::FinishedHeight(new.tip().num_hash()))?; + } + } + Ok(()) +} + +// ── Task A: startup indexing ────────────────────────────────────────────────── + +/// Execute backfill → reconcile, set `is_ready = true`, then send the startup +/// `FinishedHeight` through `control`. +/// +/// Call once from a spawned task after the node's provider is available. +pub fn run_startup_indexing( + node: &Node, + control: &ReferenceIndexControl, +) -> eyre::Result<()> +where + Node: FullNodeComponents< + Types: NodeTypes< + Primitives = MorphPrimitives, + ChainSpec = MorphChainSpec, + >, + >, + Node::Provider: BlockReader + + BlockNumReader + + HeaderProvider
+ + BlockHashReader + + ChainSpecProvider, +{ + let provider = node.provider().clone(); + let chain_spec = provider.chain_spec(); + let head = provider.best_block_number()?; + + // Re-resolve jade sentinel if Jade has since activated. + maybe_reset_jade_sentinel(&control.db, &provider, chain_spec.as_ref(), head)?; + + // Run backfill (no-op if already Complete). + run_backfill( + &control.db, + &provider, + chain_spec.as_ref(), + head, + DEFAULT_BACKFILL_BATCH_BLOCKS, + )?; + + // Re-read head in case new blocks arrived during backfill. + let current_head = provider.best_block_number()?; + + // Startup reconcile: canonical hash check + suffix gap. + run_startup_reconcile( + &control.db, + &provider, + current_head, + DEFAULT_MAX_REORG_DEPTH, + )?; + + // Atomically mark ready and signal the ExEx. + control.db.set_ready(true); + + let indexed_to = control.db.indexed_to()?; + let hash = provider + .block_hash(indexed_to)? + .ok_or_else(|| eyre::eyre!("missing canonical hash for block {indexed_to}"))?; + + control.mark_startup_finished(BlockNumHash { + number: indexed_to, + hash, + })?; + + info!( + target: "morph::reference_index", + indexed_to, + "reference index ready" + ); + + Ok(()) +} diff --git a/crates/node/src/lib.rs b/crates/node/src/lib.rs index 399c8a5..70a0159 100644 --- a/crates/node/src/lib.rs +++ b/crates/node/src/lib.rs @@ -22,6 +22,7 @@ pub mod add_ons; pub mod args; pub mod components; +pub mod exex; pub mod node; #[cfg(feature = "test-utils")] pub mod test_utils; diff --git a/crates/rpc/Cargo.toml b/crates/rpc/Cargo.toml index 0d2feb6..261abe3 100644 --- a/crates/rpc/Cargo.toml +++ b/crates/rpc/Cargo.toml @@ -13,6 +13,7 @@ workspace = true # Morph crates morph-primitives = { workspace = true, features = ["serde-bincode-compat", "reth-codec"] } morph-chainspec.workspace = true +morph-reference-index.workspace = true morph-revm = { workspace = true, features = ["rpc"] } morph-evm = { workspace = true, features = ["rpc"] } diff --git a/crates/rpc/src/lib.rs b/crates/rpc/src/lib.rs index 1acc122..e7efe37 100644 --- a/crates/rpc/src/lib.rs +++ b/crates/rpc/src/lib.rs @@ -5,9 +5,11 @@ pub mod error; pub mod eth; pub mod eth_config; +pub mod morph; pub mod types; pub use error::MorphEthApiError; pub use eth::{MorphEthApi, MorphEthApiBuilder, MorphRpcConverter, MorphRpcTypes}; pub use eth_config::{MorphEthConfigApiServer, MorphEthConfigHandler}; +pub use morph::{MorphRpc, MorphRpcHandler, MorphRpcServer, ReferenceQueryArgs}; pub use types::*; diff --git a/crates/rpc/src/morph/handler.rs b/crates/rpc/src/morph/handler.rs new file mode 100644 index 0000000..d933867 --- /dev/null +++ b/crates/rpc/src/morph/handler.rs @@ -0,0 +1,86 @@ +//! `MorphRpc` handler implementation. + +use crate::morph::rpc::{MorphRpcServer, ReferenceQueryArgs}; +use jsonrpsee::{ + core::RpcResult, + types::{ErrorCode, ErrorObjectOwned}, +}; +use morph_reference_index::{ + ReferenceIndexError, ReferenceIndexReader, ReferenceQuery, ReferenceTransactionResult, +}; +use reth_storage_api::BlockNumReader; + +// ── Context ────────────────────────────────────────────────────────────────── + +/// `morph_` namespace context. All dependencies are required; no `Option<>`. +/// +/// `Provider` must implement [`BlockNumReader`] so the handler can compare the +/// current canonical tip against the index's `indexed_to` for lag detection. +#[derive(Debug, Clone)] +pub struct MorphRpc { + pub reference_index: ReferenceIndexReader, + pub provider: Provider, +} + +impl MorphRpc { + pub const fn new(reference_index: ReferenceIndexReader, provider: Provider) -> Self { + Self { + reference_index, + provider, + } + } +} + +// ── Handler ─────────────────────────────────────────────────────────────────── + +/// Handler that wraps [`MorphRpc`] and implements the jsonrpsee server trait. +#[derive(Debug, Clone)] +pub struct MorphRpcHandler { + ctx: MorphRpc, +} + +impl MorphRpcHandler { + pub const fn new(ctx: MorphRpc) -> Self { + Self { ctx } + } +} + +impl MorphRpcServer + for MorphRpcHandler +{ + fn get_transaction_hashes_by_reference( + &self, + args: ReferenceQueryArgs, + ) -> RpcResult> { + let query = + ReferenceQuery::new(args.reference, args.offset, args.limit).map_err(to_rpc_error)?; + + let canonical_tip = self + .ctx + .provider + .best_block_number() + .map_err(|e| to_rpc_error(ReferenceIndexError::Other(eyre::eyre!(e))))?; + + self.ctx + .reference_index + .query(query, canonical_tip) + .map_err(to_rpc_error) + } +} + +// ── error mapping ───────────────────────────────────────────────────────────── + +fn to_rpc_error(error: ReferenceIndexError) -> ErrorObjectOwned { + match error { + ReferenceIndexError::Initializing => { + ErrorObjectOwned::owned(-32000, "reference index initializing", None::<()>) + } + ReferenceIndexError::IndexBehind => { + ErrorObjectOwned::owned(-32000, "reference index is behind", None::<()>) + } + ReferenceIndexError::LimitTooLarge { .. } | ReferenceIndexError::OffsetTooLarge { .. } => { + ErrorObjectOwned::owned(ErrorCode::InvalidParams.code(), error.to_string(), None::<()>) + } + other => ErrorObjectOwned::owned(-32000, other.to_string(), None::<()>), + } +} diff --git a/crates/rpc/src/morph/mod.rs b/crates/rpc/src/morph/mod.rs new file mode 100644 index 0000000..f0ac295 --- /dev/null +++ b/crates/rpc/src/morph/mod.rs @@ -0,0 +1,10 @@ +//! Morph-specific `morph_` RPC namespace. +//! +//! Currently provides `morph_getTransactionHashesByReference` backed by the +//! persistent reference index. + +pub mod handler; +pub mod rpc; + +pub use handler::{MorphRpc, MorphRpcHandler}; +pub use rpc::{MorphRpcServer, ReferenceQueryArgs}; diff --git a/crates/rpc/src/morph/rpc.rs b/crates/rpc/src/morph/rpc.rs new file mode 100644 index 0000000..89e9d84 --- /dev/null +++ b/crates/rpc/src/morph/rpc.rs @@ -0,0 +1,42 @@ +//! jsonrpsee trait for the `morph_` RPC namespace. + +use alloy_primitives::B256; +use jsonrpsee::{core::RpcResult, proc_macros::rpc}; +use morph_reference_index::ReferenceTransactionResult; +use serde::{Deserialize, Serialize}; + +/// Parameters for `morph_getTransactionHashesByReference`. +/// +/// Hex-encoded quantities are accepted (`"0x0"`, `"0x64"`) for `offset`/`limit` +/// for geth compatibility; plain integers are also accepted. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ReferenceQueryArgs { + /// 32-byte Morph transaction reference key. + pub reference: B256, + /// Starting offset (default 0, max 10 000). + #[serde(default)] + pub offset: Option, + /// Maximum number of results (default 100, max 100). + #[serde(default)] + pub limit: Option, +} + +/// `morph_` RPC trait. +#[rpc(server, namespace = "morph")] +pub trait MorphRpc { + /// Return all MorphTx transactions carrying the given `reference` key, + /// ordered by (block_number, tx_index) ascending, with offset-based + /// pagination. + /// + /// Returns `-32000 "reference index initializing"` while the startup + /// backfill/reconcile has not yet completed. + /// + /// Returns `-32000 "reference index is behind"` when the index lags the + /// current chain tip by more than the configured threshold (default 16). + #[method(name = "getTransactionHashesByReference")] + fn get_transaction_hashes_by_reference( + &self, + args: ReferenceQueryArgs, + ) -> RpcResult>; +} From 50654d2c70b8e9ece52b4e842c8e391bfa8d412d Mon Sep 17 00:00:00 2001 From: panos Date: Thu, 30 Apr 2026 16:05:44 +0800 Subject: [PATCH 03/15] chore(reference-index): fix clippy lints (collapsible_if, redundant_clone) --- crates/node/src/add_ons.rs | 2 +- crates/node/src/exex/reference_index.rs | 16 +++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/crates/node/src/add_ons.rs b/crates/node/src/add_ons.rs index c1f9e24..098c368 100644 --- a/crates/node/src/add_ons.rs +++ b/crates/node/src/add_ons.rs @@ -154,7 +154,7 @@ where }); let morph_rpc_ctx = MorphRpc::new( - ReferenceIndexReader::new(control.db.clone(), DEFAULT_LAG_THRESHOLD), + ReferenceIndexReader::new(control.db, DEFAULT_LAG_THRESHOLD), provider.clone(), ); Some(MorphRpcHandler::new(morph_rpc_ctx)) diff --git a/crates/node/src/exex/reference_index.rs b/crates/node/src/exex/reference_index.rs index 15e2fd8..f6dc8a4 100644 --- a/crates/node/src/exex/reference_index.rs +++ b/crates/node/src/exex/reference_index.rs @@ -90,15 +90,13 @@ where tokio::select! { // Forward the startup FinishedHeight when Task A finishes. changed = startup_rx.changed() => { - if changed.is_ok() { - if let Some(block) = *startup_rx.borrow_and_update() { - debug!( - target: "morph::reference_index", - block_number = block.number, - "startup complete; sending initial FinishedHeight" - ); - ctx.events.send(ExExEvent::FinishedHeight(block))?; - } + if changed.is_ok() && let Some(block) = *startup_rx.borrow_and_update() { + debug!( + target: "morph::reference_index", + block_number = block.number, + "startup complete; sending initial FinishedHeight" + ); + ctx.events.send(ExExEvent::FinishedHeight(block))?; } } From 470123b9bbc4e2e6f484fc219e4787d8414fe9e8 Mon Sep 17 00:00:00 2001 From: panos Date: Thu, 30 Apr 2026 16:06:15 +0800 Subject: [PATCH 04/15] chore: cargo fmt --- bin/morph-reth/src/main.rs | 12 +++---- crates/node/src/add_ons.rs | 1 - crates/node/src/exex/mod.rs | 4 +-- crates/node/src/exex/reference_index.rs | 27 +++++++------- crates/reference-index/src/backfill.rs | 16 ++++----- crates/reference-index/src/db.rs | 48 +++++++++++++++---------- crates/reference-index/src/writer.rs | 14 ++++---- crates/rpc/src/morph/handler.rs | 6 +++- 8 files changed, 66 insertions(+), 62 deletions(-) diff --git a/bin/morph-reth/src/main.rs b/bin/morph-reth/src/main.rs index b416626..b99e418 100644 --- a/bin/morph-reth/src/main.rs +++ b/bin/morph-reth/src/main.rs @@ -53,8 +53,7 @@ fn main() { // can wire it into both the ExEx and the add-ons. let chain_spec = builder.config().chain.clone(); let datadir = builder.config().datadir(); - let reference_index_path = - datadir.data_dir().join("morph").join("reference_index"); + let reference_index_path = datadir.data_dir().join("morph").join("reference_index"); let chain_id = chain_spec.chain().id(); let genesis_hash = chain_spec.genesis_hash(); // from EthChainSpec trait @@ -74,12 +73,9 @@ fn main() { .with_types::() .with_components(node.components_builder()) .with_add_ons(MorphAddOns::new().with_reference_index(control)) - .install_exex( - "morph-reference-index", - async move |ctx| { - Ok(reference_index_exex(ctx, exex_control, startup_rx)) - }, - ) + .install_exex("morph-reference-index", async move |ctx| { + Ok(reference_index_exex(ctx, exex_control, startup_rx)) + }) .launch_with_debug_capabilities() .await?; diff --git a/crates/node/src/add_ons.rs b/crates/node/src/add_ons.rs index 098c368..0ce26ca 100644 --- a/crates/node/src/add_ons.rs +++ b/crates/node/src/add_ons.rs @@ -92,7 +92,6 @@ where } } - impl NodeAddOns for MorphAddOns where N: FullNodeComponents, diff --git a/crates/node/src/exex/mod.rs b/crates/node/src/exex/mod.rs index 045210c..b6e8c22 100644 --- a/crates/node/src/exex/mod.rs +++ b/crates/node/src/exex/mod.rs @@ -2,6 +2,4 @@ pub mod reference_index; -pub use reference_index::{ - ReferenceIndexControl, reference_index_exex, run_startup_indexing, -}; +pub use reference_index::{ReferenceIndexControl, reference_index_exex, run_startup_indexing}; diff --git a/crates/node/src/exex/reference_index.rs b/crates/node/src/exex/reference_index.rs index f6dc8a4..e6014d5 100644 --- a/crates/node/src/exex/reference_index.rs +++ b/crates/node/src/exex/reference_index.rs @@ -24,7 +24,9 @@ use morph_reference_index::{ use reth_db_api::transaction::DbTx; use reth_exex::{ExExContext, ExExEvent, ExExNotification}; use reth_node_api::{FullNodeComponents, NodeTypes}; -use reth_provider::{BlockHashReader, BlockReader, BlockNumReader, ChainSpecProvider, HeaderProvider}; +use reth_provider::{ + BlockHashReader, BlockNumReader, BlockReader, ChainSpecProvider, HeaderProvider, +}; use reth_storage_api::TransactionVariant; use tokio::sync::watch; use tokio_stream::StreamExt; @@ -74,10 +76,7 @@ pub async fn reference_index_exex( ) -> eyre::Result<()> where Node: FullNodeComponents< - Types: NodeTypes< - Primitives = MorphPrimitives, - ChainSpec = MorphChainSpec, - >, + Types: NodeTypes, >, Node::Provider: BlockReader + BlockNumReader @@ -168,7 +167,13 @@ where let block = provider .sealed_block_with_senders(number.into(), TransactionVariant::NoHash)? .ok_or_else(|| eyre::eyre!("missing block {number} during gap fill"))?; - write_block(&tx, block.number(), block.hash(), block.timestamp(), &block.body().transactions)?; + write_block( + &tx, + block.number(), + block.hash(), + block.timestamp(), + &block.body().transactions, + )?; } update_indexed_to(&tx, to)?; tx.commit()?; @@ -235,16 +240,10 @@ fn handle_notification( /// `FinishedHeight` through `control`. /// /// Call once from a spawned task after the node's provider is available. -pub fn run_startup_indexing( - node: &Node, - control: &ReferenceIndexControl, -) -> eyre::Result<()> +pub fn run_startup_indexing(node: &Node, control: &ReferenceIndexControl) -> eyre::Result<()> where Node: FullNodeComponents< - Types: NodeTypes< - Primitives = MorphPrimitives, - ChainSpec = MorphChainSpec, - >, + Types: NodeTypes, >, Node::Provider: BlockReader + BlockNumReader diff --git a/crates/reference-index/src/backfill.rs b/crates/reference-index/src/backfill.rs index 71db21e..c6586a6 100644 --- a/crates/reference-index/src/backfill.rs +++ b/crates/reference-index/src/backfill.rs @@ -71,11 +71,9 @@ where // `lo` is now the first block whose timestamp >= jade_ts. // Verify that the previous block (if any) has timestamp < jade_ts. if lo > 0 { - let prev = provider - .header_by_number(lo - 1)? - .ok_or_else(|| { - ReferenceIndexError::Other(eyre::eyre!("missing header at {}", lo - 1)) - })?; + let prev = provider.header_by_number(lo - 1)?.ok_or_else(|| { + ReferenceIndexError::Other(eyre::eyre!("missing header at {}", lo - 1)) + })?; if prev.timestamp() >= jade_ts { // Shouldn't happen on a well-formed chain, but fall back to sentinel. return Ok(JADE_NOT_ACTIVE_SENTINEL); @@ -98,7 +96,9 @@ pub fn run_backfill( batch_size: u64, ) -> Result<(), ReferenceIndexError> where - P: BlockReader + BlockNumReader + HeaderProvider
, + P: BlockReader + + BlockNumReader + + HeaderProvider
, CS: MorphHardforks, { let state = db.backfill_state()?; @@ -139,9 +139,7 @@ where return Ok(()); } - let jade_first = db - .jade_first_block_number()? - .unwrap_or(start); + let jade_first = db.jade_first_block_number()?.unwrap_or(start); info!( target: "morph::reference_index", diff --git a/crates/reference-index/src/db.rs b/crates/reference-index/src/db.rs index 36977e8..41dfd00 100644 --- a/crates/reference-index/src/db.rs +++ b/crates/reference-index/src/db.rs @@ -50,9 +50,11 @@ pub fn encode_u64(value: u64) -> MetaValue { } pub fn decode_u64(value: MetaValue) -> Result { - let bytes: [u8; 8] = value.0.as_slice().try_into().map_err(|_| { - ReferenceIndexError::Other(eyre::eyre!("invalid u64 metadata length")) - })?; + let bytes: [u8; 8] = value + .0 + .as_slice() + .try_into() + .map_err(|_| ReferenceIndexError::Other(eyre::eyre!("invalid u64 metadata length")))?; Ok(u64::from_be_bytes(bytes)) } @@ -61,9 +63,11 @@ pub fn encode_u32(value: u32) -> MetaValue { } pub fn decode_u32(value: MetaValue) -> Result { - let bytes: [u8; 4] = value.0.as_slice().try_into().map_err(|_| { - ReferenceIndexError::Other(eyre::eyre!("invalid u32 metadata length")) - })?; + let bytes: [u8; 4] = value + .0 + .as_slice() + .try_into() + .map_err(|_| ReferenceIndexError::Other(eyre::eyre!("invalid u32 metadata length")))?; Ok(u32::from_be_bytes(bytes)) } @@ -72,9 +76,11 @@ pub fn encode_b256(value: B256) -> MetaValue { } pub fn decode_b256(value: MetaValue) -> Result { - let bytes: [u8; 32] = value.0.as_slice().try_into().map_err(|_| { - ReferenceIndexError::Other(eyre::eyre!("invalid B256 metadata length")) - })?; + let bytes: [u8; 32] = value + .0 + .as_slice() + .try_into() + .map_err(|_| ReferenceIndexError::Other(eyre::eyre!("invalid B256 metadata length")))?; Ok(B256::new(bytes)) } @@ -105,7 +111,9 @@ impl ReferenceIndexDb { path, DatabaseArguments::new(reth_db::models::ClientVersion::default()), ) - .map_err(|e| ReferenceIndexError::Other(eyre::eyre!("failed to open reference index DB: {e}")))?; + .map_err(|e| { + ReferenceIndexError::Other(eyre::eyre!("failed to open reference index DB: {e}")) + })?; let this = Self { db: Arc::new(db), @@ -264,11 +272,8 @@ impl ReferenceIndexDb { /// Used for gap detection after startup. pub fn highest_block_reference_index(&self) -> Result, ReferenceIndexError> { let tx = self.tx()?; - let mut cursor = - tx.cursor_read::()?; - Ok(cursor - .last()? - .map(|(k, _)| k.block_number)) + let mut cursor = tx.cursor_read::()?; + Ok(cursor.last()?.map(|(k, _)| k.block_number)) } } @@ -297,16 +302,21 @@ mod tests { ReferenceIndexDb::open(dir.path(), 2818, B256::ZERO).unwrap(); // Re-open with wrong chain_id. let err = ReferenceIndexDb::open(dir.path(), 9999, B256::ZERO).unwrap_err(); - assert!(matches!(err, ReferenceIndexError::ChainIdentityMismatch("chain_id"))); + assert!(matches!( + err, + ReferenceIndexError::ChainIdentityMismatch("chain_id") + )); } #[test] fn open_rejects_mismatched_genesis_hash() { let dir = TempDir::new().unwrap(); ReferenceIndexDb::open(dir.path(), 2818, B256::ZERO).unwrap(); - let err = - ReferenceIndexDb::open(dir.path(), 2818, B256::repeat_byte(0xff)).unwrap_err(); - assert!(matches!(err, ReferenceIndexError::ChainIdentityMismatch("genesis_hash"))); + let err = ReferenceIndexDb::open(dir.path(), 2818, B256::repeat_byte(0xff)).unwrap_err(); + assert!(matches!( + err, + ReferenceIndexError::ChainIdentityMismatch("genesis_hash") + )); } #[test] diff --git a/crates/reference-index/src/writer.rs b/crates/reference-index/src/writer.rs index f275c41..3be7e54 100644 --- a/crates/reference-index/src/writer.rs +++ b/crates/reference-index/src/writer.rs @@ -7,8 +7,8 @@ use crate::{ db::{IndexMetaKey, encode_b256, encode_u64}, tables::{ - BlockHashValue, BlockReferenceIndex, BlockReferenceKey, BlockTimestampValue, - IndexMeta, IndexedBlockKey, IndexedBlocks, MetaValue, ReferenceIndex, ReferenceIndexKey, + BlockHashValue, BlockReferenceIndex, BlockReferenceKey, BlockTimestampValue, IndexMeta, + IndexedBlockKey, IndexedBlocks, MetaValue, ReferenceIndex, ReferenceIndexKey, ReferenceValue, }, types::{BackfillState, ReferenceIndexError}, @@ -69,10 +69,7 @@ pub fn write_block( /// Implements the reverse of [`write_block`]: reads every /// `BlockReferenceIndex` row for `block_number`, reconstructs each /// `ReferenceIndex` key, and removes entries from all three tables. -pub fn delete_block( - tx: &Tx, - block_number: u64, -) -> Result<(), ReferenceIndexError> { +pub fn delete_block(tx: &Tx, block_number: u64) -> Result<(), ReferenceIndexError> { // 1. Collect all BlockReferenceIndex rows for this block. let mut entries = Vec::new(); { @@ -245,7 +242,10 @@ mod tests { write_block(&tx, 7, B256::repeat_byte(0x11), 100, &[]).unwrap(); update_indexed_to(&tx, 7).unwrap(); tx.commit().unwrap(); - assert_eq!(db.indexed_block_hash(7).unwrap(), Some(B256::repeat_byte(0x11))); + assert_eq!( + db.indexed_block_hash(7).unwrap(), + Some(B256::repeat_byte(0x11)) + ); let tx = db.tx_mut().unwrap(); delete_block(&tx, 7).unwrap(); diff --git a/crates/rpc/src/morph/handler.rs b/crates/rpc/src/morph/handler.rs index d933867..0a84fe2 100644 --- a/crates/rpc/src/morph/handler.rs +++ b/crates/rpc/src/morph/handler.rs @@ -79,7 +79,11 @@ fn to_rpc_error(error: ReferenceIndexError) -> ErrorObjectOwned { ErrorObjectOwned::owned(-32000, "reference index is behind", None::<()>) } ReferenceIndexError::LimitTooLarge { .. } | ReferenceIndexError::OffsetTooLarge { .. } => { - ErrorObjectOwned::owned(ErrorCode::InvalidParams.code(), error.to_string(), None::<()>) + ErrorObjectOwned::owned( + ErrorCode::InvalidParams.code(), + error.to_string(), + None::<()>, + ) } other => ErrorObjectOwned::owned(-32000, other.to_string(), None::<()>), } From f5fff3b6bcd03b622091a6750a1c41172c0f5db4 Mon Sep 17 00:00:00 2001 From: panos Date: Thu, 30 Apr 2026 16:26:18 +0800 Subject: [PATCH 05/15] test(reference-index): add integration tests for backfill + query Three node-level integration tests verify the reference index storage layer: - finds single morph_tx by reference after backfill - paginates results across multiple blocks correctly - returns empty for unrelated reference keys Tests run backfill + reconcile directly against the node's provider (no ExEx required), which is sufficient to validate the query path. --- Cargo.lock | 1 + crates/node/Cargo.toml | 2 + crates/node/tests/it/main.rs | 1 + crates/node/tests/it/reference_index.rs | 178 ++++++++++++++++++++++++ 4 files changed, 182 insertions(+) create mode 100644 crates/node/tests/it/reference_index.rs diff --git a/Cargo.lock b/Cargo.lock index a5c6c3b..6729a54 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4872,6 +4872,7 @@ dependencies = [ "reth-trie", "serde", "serde_json", + "tempfile", "tokio", "tokio-stream", "tracing", diff --git a/crates/node/Cargo.toml b/crates/node/Cargo.toml index 34ec2fe..e976f71 100644 --- a/crates/node/Cargo.toml +++ b/crates/node/Cargo.toml @@ -90,9 +90,11 @@ alloy-primitives.workspace = true alloy-rpc-types-engine.workspace = true alloy-rpc-types-eth.workspace = true jsonrpsee.workspace = true +morph-chainspec.workspace = true morph-payload-types.workspace = true morph-primitives.workspace = true serde_json.workspace = true +tempfile.workspace = true [[test]] name = "it" diff --git a/crates/node/tests/it/main.rs b/crates/node/tests/it/main.rs index 13f5577..fd66dea 100644 --- a/crates/node/tests/it/main.rs +++ b/crates/node/tests/it/main.rs @@ -13,6 +13,7 @@ mod evm; mod hardfork; mod l1_messages; mod morph_tx; +mod reference_index; mod rpc; mod sync; mod txpool; diff --git a/crates/node/tests/it/reference_index.rs b/crates/node/tests/it/reference_index.rs new file mode 100644 index 0000000..42eeae9 --- /dev/null +++ b/crates/node/tests/it/reference_index.rs @@ -0,0 +1,178 @@ +//! Integration tests for `morph_getTransactionHashesByReference`. +//! +//! These tests spin up a real Morph test node, produce blocks with reference- +//! carrying `MorphTx` transactions, then run the reference index backfill + +//! reconcile against the node's provider, and verify the query results. +//! +//! Because `reth_e2e_test_utils::setup_engine` doesn't support ExEx injection, +//! we test the storage layer (backfill/reconcile/query) directly rather than +//! through the live ExEx. The ExEx logic is separately covered by the unit +//! tests in `morph-reference-index`. + +use alloy_consensus::transaction::TxHashRef; +use alloy_primitives::{B256, U64}; +use morph_node::test_utils::{MorphTxBuilder, TEST_TOKEN_ID, TestNodeBuilder}; +use morph_reference_index::{ + DEFAULT_LAG_THRESHOLD, DEFAULT_MAX_REORG_DEPTH, ReferenceIndexDb, ReferenceIndexReader, + ReferenceQuery, backfill::run_backfill, reconcile::run_startup_reconcile, +}; +use reth_payload_primitives::BuiltPayload; +use reth_provider::BlockNumReader; +use tempfile::TempDir; + +// ── helpers ─────────────────────────────────────────────────────────────────── + +async fn open_and_backfill_index

(provider: &P, dir: &TempDir) -> ReferenceIndexDb +where + P: reth_provider::BlockReader + + reth_provider::HeaderProvider

+ + BlockNumReader + + reth_provider::BlockHashReader + + reth_provider::ChainSpecProvider, +{ + let chain_spec = provider.chain_spec(); + let chain_id = reth_chainspec::EthChainSpec::chain(chain_spec.as_ref()).id(); + let genesis_hash = reth_chainspec::EthChainSpec::genesis_hash(chain_spec.as_ref()); + + let db = ReferenceIndexDb::open(dir.path(), chain_id, genesis_hash).unwrap(); + + let head = provider.best_block_number().unwrap(); + run_backfill(&db, provider, chain_spec.as_ref(), head, 256).unwrap(); + run_startup_reconcile(&db, provider, head, DEFAULT_MAX_REORG_DEPTH).unwrap(); + + db.set_ready(true); + db +} + +// ── tests ───────────────────────────────────────────────────────────────────── + +/// Produce one block with a reference-carrying MorphTx and verify the index +/// returns it for the correct reference. +#[tokio::test(flavor = "multi_thread")] +async fn reference_index_finds_single_morph_tx() -> eyre::Result<()> { + reth_tracing::init_test_tracing(); + + let (mut nodes, _tasks, wallet) = TestNodeBuilder::new().build().await?; + let mut node = nodes.pop().unwrap(); + + let reference = B256::with_last_byte(0x99); + let raw_tx = MorphTxBuilder::new(wallet.chain_id, wallet.inner.clone(), wallet.inner_nonce) + .with_v1_token_fee(TEST_TOKEN_ID) + .with_reference(reference) + .build_signed()?; + node.rpc.inject_tx(raw_tx).await?; + let payload = node.advance_block().await?; + let tx_hash = *payload + .block() + .body() + .transactions + .first() + .unwrap() + .tx_hash(); + + let dir = TempDir::new()?; + let db = open_and_backfill_index(&node.inner.provider, &dir).await; + + let reader = ReferenceIndexReader::new(db.clone(), DEFAULT_LAG_THRESHOLD); + let canonical_tip = node.inner.provider.best_block_number()?; + let query = ReferenceQuery::new(reference, None, None).unwrap(); + let results = reader.query(query, canonical_tip)?; + + assert_eq!(results.len(), 1, "should find exactly one transaction"); + assert_eq!(results[0].transaction_hash, tx_hash); + assert_eq!(results[0].transaction_index, U64::from(0u64)); + + Ok(()) +} + +/// Produce multiple blocks with the same reference and verify pagination. +#[tokio::test(flavor = "multi_thread")] +async fn reference_index_pagination() -> eyre::Result<()> { + reth_tracing::init_test_tracing(); + + let (mut nodes, _tasks, wallet) = TestNodeBuilder::new().build().await?; + let mut node = nodes.pop().unwrap(); + let reference = B256::with_last_byte(0xaa); + let mut tx_hashes = Vec::new(); + + for nonce in 0..3u64 { + let raw_tx = MorphTxBuilder::new( + wallet.chain_id, + wallet.inner.clone(), + wallet.inner_nonce + nonce, + ) + .with_v1_token_fee(TEST_TOKEN_ID) + .with_reference(reference) + .build_signed()?; + node.rpc.inject_tx(raw_tx).await?; + let payload = node.advance_block().await?; + tx_hashes.push( + *payload + .block() + .body() + .transactions + .first() + .unwrap() + .tx_hash(), + ); + } + + let dir = TempDir::new()?; + let db = open_and_backfill_index(&node.inner.provider, &dir).await; + + let reader = ReferenceIndexReader::new(db, DEFAULT_LAG_THRESHOLD); + let canonical_tip = node.inner.provider.best_block_number()?; + + // Page 1: offset=0, limit=2 → first two entries. + let page1 = reader.query( + ReferenceQuery::new(reference, Some(0), Some(2)).unwrap(), + canonical_tip, + )?; + assert_eq!(page1.len(), 2); + assert_eq!(page1[0].transaction_hash, tx_hashes[0]); + assert_eq!(page1[1].transaction_hash, tx_hashes[1]); + + // Page 2: offset=2, limit=2 → last entry only. + let page2 = reader.query( + ReferenceQuery::new(reference, Some(2), Some(2)).unwrap(), + canonical_tip, + )?; + assert_eq!(page2.len(), 1); + assert_eq!(page2[0].transaction_hash, tx_hashes[2]); + + Ok(()) +} + +/// A different reference key returns no results. +#[tokio::test(flavor = "multi_thread")] +async fn reference_index_no_results_for_unrelated_reference() -> eyre::Result<()> { + reth_tracing::init_test_tracing(); + + let (mut nodes, _tasks, wallet) = TestNodeBuilder::new().build().await?; + let mut node = nodes.pop().unwrap(); + let reference = B256::with_last_byte(0xbb); + let other_reference = B256::with_last_byte(0xcc); + + let raw_tx = MorphTxBuilder::new(wallet.chain_id, wallet.inner.clone(), wallet.inner_nonce) + .with_v1_token_fee(TEST_TOKEN_ID) + .with_reference(reference) + .build_signed()?; + node.rpc.inject_tx(raw_tx).await?; + node.advance_block().await?; + + let dir = TempDir::new()?; + let db = open_and_backfill_index(&node.inner.provider, &dir).await; + let reader = ReferenceIndexReader::new(db, DEFAULT_LAG_THRESHOLD); + let canonical_tip = node.inner.provider.best_block_number()?; + + let results = reader.query( + ReferenceQuery::new(other_reference, None, None).unwrap(), + canonical_tip, + )?; + assert!( + results.is_empty(), + "unrelated reference should return nothing" + ); + + Ok(()) +} From eb1f34b5a49606d6433394ab42c43dd320887859 Mon Sep 17 00:00:00 2001 From: panos Date: Thu, 30 Apr 2026 17:03:41 +0800 Subject: [PATCH 06/15] fix(reference-index): fix 5 adversarial review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Finding 4 (Critical): NoHash → WithHash for all historical block reads - backfill.rs, reconcile.rs, exex/reference_index.rs fill_gap - reth docs say TransactionVariant::NoHash produces invalid tx hashes; write_block stores tx_hash in index keys, so NoHash caused silent data corruption in all historical indexing paths Finding 3 (Moderate): reconcile check_start clamped to indexed_from - sentinel path (jade not yet active): backfill sets indexed_to=head but writes no IndexedBlocks, so reconcile's scan would see None != canonical and trigger spurious fork_height at check_start - fix: take max(indexed_from, indexed_to - depth) as check_start Finding 1 (Critical): Task A startup failure upgraded to fatal - was: error! log and continue, leaving RPC stuck at "initializing" forever - now: panic propagated into spawn_critical → node shutdown on failure Finding 5 (Moderate): first-ready gap fill is now delete-then-write - fill_gap renamed to fill_gap_idempotent, deletes stale entries first - also handles ChainReverted notification as first is_ready notification Finding 8 (Nit): RPC method marked blocking for MDBX I/O - #[method(name = ..., blocking)] so jsonrpsee dispatches on blocking pool Also: clarify write_block doc — not idempotent, caller must delete_block first --- crates/node/src/add_ons.rs | 19 ++++++++------ crates/node/src/exex/reference_index.rs | 34 ++++++++++++++++++++----- crates/reference-index/src/backfill.rs | 5 +++- crates/reference-index/src/reconcile.rs | 10 ++++++-- crates/reference-index/src/writer.rs | 9 ++++--- crates/rpc/src/morph/rpc.rs | 4 ++- 6 files changed, 59 insertions(+), 22 deletions(-) diff --git a/crates/node/src/add_ons.rs b/crates/node/src/add_ons.rs index 0ce26ca..dc05e19 100644 --- a/crates/node/src/add_ons.rs +++ b/crates/node/src/add_ons.rs @@ -137,18 +137,21 @@ where let reference_rpc_handler = if let Some(control) = self.reference_index { let startup_control = control.clone(); let startup_node = ctx.node.clone(); + // spawn_critical causes node shutdown on panic/error, matching the spec + // requirement that reference index startup failures are fatal. task_executor.spawn_critical("morph reference index startup", async move { - if let Err(err) = tokio::task::spawn_blocking(move || { + let result = tokio::task::spawn_blocking(move || { crate::exex::run_startup_indexing(&startup_node, &startup_control) }) .await - .unwrap_or_else(|e| Err(eyre::eyre!("panic: {e}"))) - { - tracing::error!( - target: "morph::reference_index", - ?err, - "reference index startup indexing failed" - ); + .unwrap_or_else(|e| Err(eyre::eyre!("reference index startup panicked: {e}"))); + + match result { + Ok(()) => {} + Err(err) => { + // Propagate to spawn_critical which will shut down the node. + panic!("reference index startup failed: {err:?}"); + } } }); diff --git a/crates/node/src/exex/reference_index.rs b/crates/node/src/exex/reference_index.rs index e6014d5..be2846a 100644 --- a/crates/node/src/exex/reference_index.rs +++ b/crates/node/src/exex/reference_index.rs @@ -114,20 +114,34 @@ where continue; } - // Gap check on first is_ready notification. + // On the first is_ready notification: fill any gap that opened + // between when Task A finished reconcile and when this notification + // arrived. We delete-then-write to avoid stale entries in case a + // mini-reorg happened during that window. if first_ready { first_ready = false; + let indexed_to = control.db.indexed_to()?; if let Some(chain) = notification.committed_chain() { - let indexed_to = control.db.indexed_to()?; let notif_start = chain.first().number(); if notif_start > indexed_to + 1 { - fill_gap( + fill_gap_idempotent( &control.db, &ctx.components.provider().clone(), indexed_to + 1, notif_start - 1, )?; } + } else if let Some(old) = notification.reverted_chain() { + // Reorg during drain window: roll back below the revert start. + let revert_start = old.first().number(); + if revert_start <= indexed_to { + fill_gap_idempotent( + &control.db, + &ctx.components.provider().clone(), + revert_start, + indexed_to, + )?; + } } } @@ -144,8 +158,11 @@ where Ok(()) } -/// Fill index entries from main DB for the gap `[from, to]`. -fn fill_gap( +/// Fill (or repair) index entries for blocks `[from, to]` using canonical main DB data. +/// +/// Uses delete-then-write per block to stay idempotent even if some entries +/// already exist (e.g. partial prior write or mini-reorg during drain window). +fn fill_gap_idempotent( db: &ReferenceIndexDb, provider: &Provider, from: u64, @@ -160,12 +177,15 @@ where info!( target: "morph::reference_index", from, to, - "filling gap between startup reconcile and first ExEx notification" + "idempotent gap fill between startup reconcile and first ExEx notification" ); let tx = db.tx_mut()?; for number in from..=to { + // Delete any stale entries before writing canonical ones. + delete_block(&tx, number)?; + // `WithHash` is required: write_block stores tx hashes in the index keys. let block = provider - .sealed_block_with_senders(number.into(), TransactionVariant::NoHash)? + .sealed_block_with_senders(number.into(), TransactionVariant::WithHash)? .ok_or_else(|| eyre::eyre!("missing block {number} during gap fill"))?; write_block( &tx, diff --git a/crates/reference-index/src/backfill.rs b/crates/reference-index/src/backfill.rs index c6586a6..7bcb5fb 100644 --- a/crates/reference-index/src/backfill.rs +++ b/crates/reference-index/src/backfill.rs @@ -157,8 +157,11 @@ where let tx = db.tx_mut()?; for number in current..=batch_end { + // `WithHash` is required because write_block stores transaction hashes in the + // reference index keys. `NoHash` would leave tx hashes uninitialised per + // reth's BlockReader contract (see blockchain_provider.rs:309). let block = provider - .sealed_block_with_senders(number.into(), TransactionVariant::NoHash)? + .sealed_block_with_senders(number.into(), TransactionVariant::WithHash)? .ok_or_else(|| { ReferenceIndexError::Other(eyre::eyre!( "missing block {number} during backfill" diff --git a/crates/reference-index/src/reconcile.rs b/crates/reference-index/src/reconcile.rs index 25981b0..70e240a 100644 --- a/crates/reference-index/src/reconcile.rs +++ b/crates/reference-index/src/reconcile.rs @@ -39,7 +39,12 @@ where let indexed_to = db.indexed_to()?; // ── Step A: canonical hash check ──────────────────────────────────────── - let check_start = indexed_to.saturating_sub(max_reorg_depth.saturating_sub(1)); + // Use indexed_from as lower bound to avoid scanning blocks that were never + // indexed (e.g. pre-Jade blocks on the sentinel path where backfill sets + // indexed_to but writes no IndexedBlocks entries). + let indexed_from = db.indexed_from()?.unwrap_or(indexed_to); + let depth_start = indexed_to.saturating_sub(max_reorg_depth.saturating_sub(1)); + let check_start = indexed_from.max(depth_start); let mut fork_height: Option = None; for number in check_start..=indexed_to { @@ -92,8 +97,9 @@ where let tx = db.tx_mut()?; for number in rebuild_start..=current_head { + // `WithHash` is required: we persist tx hashes as part of the index keys. let block = provider - .sealed_block_with_senders(number.into(), TransactionVariant::NoHash)? + .sealed_block_with_senders(number.into(), TransactionVariant::WithHash)? .ok_or_else(|| { ReferenceIndexError::Other(eyre::eyre!( "missing block {number} during reconcile" diff --git a/crates/reference-index/src/writer.rs b/crates/reference-index/src/writer.rs index 3be7e54..4156d62 100644 --- a/crates/reference-index/src/writer.rs +++ b/crates/reference-index/src/writer.rs @@ -20,9 +20,12 @@ use reth_db_api::{cursor::DbCursorRO, transaction::DbTxMut}; /// Index one canonical block into all three data tables. /// -/// Returns the number of reference entries written. This function is -/// idempotent: re-writing the same block overwrites previously stored rows -/// with the same canonical values. +/// Returns the number of reference entries written. +/// +/// **Not idempotent on its own**: the caller must call [`delete_block`] for +/// the same block number before re-writing to avoid leaving stale entries +/// (keys contain `tx_hash`, so a re-write with a different tx set would leave +/// old-tx ghost rows). Reconcile and reorg paths follow this contract. pub fn write_block( tx: &Tx, block_number: u64, diff --git a/crates/rpc/src/morph/rpc.rs b/crates/rpc/src/morph/rpc.rs index 89e9d84..368817f 100644 --- a/crates/rpc/src/morph/rpc.rs +++ b/crates/rpc/src/morph/rpc.rs @@ -34,7 +34,9 @@ pub trait MorphRpc { /// /// Returns `-32000 "reference index is behind"` when the index lags the /// current chain tip by more than the configured threshold (default 16). - #[method(name = "getTransactionHashesByReference")] + /// + /// Marked `blocking` because the handler runs synchronous MDBX reads. + #[method(name = "getTransactionHashesByReference", blocking)] fn get_transaction_hashes_by_reference( &self, args: ReferenceQueryArgs, From 6ce077ac5b65ae312789f4a4dacff2bd06307e50 Mon Sep 17 00:00:00 2001 From: panos Date: Thu, 30 Apr 2026 17:16:29 +0800 Subject: [PATCH 07/15] fix(reference-index): fix 3 more confirmed bugs Finding F2 (High): InProgress resume clamped to jade_first_block_number - If crash happens between InProgress write and first batch commit, indexed_to is still 0; resume now uses max(indexed_to+1, jade_first) instead of plain indexed_to+1 to avoid re-indexing pre-Jade blocks Finding F3 (High): RPC offset/limit use alloy_serde::quantity::opt - geth sends hex-encoded quantities ("0x0", "0x64") on the wire; plain u64 serde deserializer rejected them silently - matches spec constraint #2: geth compatibility first Finding F5 (Medium): paired snapshot validation implemented - ReferenceIndexDb::validate_paired_snapshot(main_block_hash_fn) checks snapshot_block_hash/number against the main DB provider and returns descriptive errors on mismatch or ahead-of-DB cases - called from run_startup_indexing before backfill starts, after provider is available (matching spec timing requirement) --- crates/node/src/exex/reference_index.rs | 11 ++++++++ crates/reference-index/src/backfill.rs | 21 +++++++++++++-- crates/reference-index/src/db.rs | 35 +++++++++++++++++++++++++ crates/rpc/src/morph/rpc.rs | 8 +++--- 4 files changed, 69 insertions(+), 6 deletions(-) diff --git a/crates/node/src/exex/reference_index.rs b/crates/node/src/exex/reference_index.rs index be2846a..cd82155 100644 --- a/crates/node/src/exex/reference_index.rs +++ b/crates/node/src/exex/reference_index.rs @@ -275,6 +275,17 @@ where let chain_spec = provider.chain_spec(); let head = provider.best_block_number()?; + // Paired-snapshot validation: if snapshot_block_hash metadata is present, + // verify it against the main DB before doing any indexing work. Fails + // startup on mismatch per design spec. + control.db.validate_paired_snapshot(|number| { + provider.block_hash(number).map_err(|e| { + morph_reference_index::ReferenceIndexError::Other(eyre::eyre!( + "failed to read main DB block hash at {number}: {e}" + )) + }) + })?; + // Re-resolve jade sentinel if Jade has since activated. maybe_reset_jade_sentinel(&control.db, &provider, chain_spec.as_ref(), head)?; diff --git a/crates/reference-index/src/backfill.rs b/crates/reference-index/src/backfill.rs index 7bcb5fb..11ee204 100644 --- a/crates/reference-index/src/backfill.rs +++ b/crates/reference-index/src/backfill.rs @@ -103,11 +103,28 @@ where { let state = db.backfill_state()?; + // `jade_first_block_number` is our canonical lower bound; it's written as the + // very first step on the NotStarted path. InProgress/Complete resumes read + // it back from IndexMeta. let start = match state { BackfillState::Complete => return Ok(()), BackfillState::InProgress => { - // Resume from last checkpoint + 1 (checkpoint is the last fully written block). - db.indexed_to()?.saturating_add(1) + // Resume from `max(indexed_to + 1, jade_first_block_number)`: if the + // crash happened between InProgress write and the first batch commit, + // `indexed_to` is still 0 and must not be used as-is. + let jade_first = db + .jade_first_block_number()? + .unwrap_or(JADE_NOT_ACTIVE_SENTINEL); + if jade_first == JADE_NOT_ACTIVE_SENTINEL { + // Sentinel case: no range to backfill; mark complete. + let tx = db.tx_mut()?; + update_indexed_from(&tx, head_at_startup)?; + update_indexed_to(&tx, head_at_startup)?; + set_backfill_state(&tx, BackfillState::Complete)?; + tx.commit()?; + return Ok(()); + } + db.indexed_to()?.saturating_add(1).max(jade_first) } BackfillState::NotStarted => { let jade_first = resolve_jade_first_block(provider, chain_spec, head_at_startup)?; diff --git a/crates/reference-index/src/db.rs b/crates/reference-index/src/db.rs index 41dfd00..ed05e99 100644 --- a/crates/reference-index/src/db.rs +++ b/crates/reference-index/src/db.rs @@ -250,6 +250,41 @@ impl ReferenceIndexDb { .transpose() } + /// Validate a paired-snapshot against the main chain provider. + /// + /// Must be called after `FullNode.provider()` is available. Fails startup + /// if `snapshot_block_number`/`snapshot_block_hash` are set but disagree + /// with the main chain (either the snapshot is ahead of the main DB, or + /// the hash at that height diverges). + /// + /// When snapshot metadata is not present (fresh DB, no paired snapshot), + /// this is a no-op. + pub fn validate_paired_snapshot(&self, main_block_hash: F) -> Result<(), ReferenceIndexError> + where + F: FnOnce(u64) -> Result, ReferenceIndexError>, + { + let Some(expected) = self.snapshot_block_hash()? else { + return Ok(()); + }; + let Some(number) = self.snapshot_block_number()? else { + // Hash without number is malformed state; treat as mismatch. + return Err(ReferenceIndexError::ChainIdentityMismatch( + "snapshot_block_hash present but snapshot_block_number missing", + )); + }; + + match main_block_hash(number)? { + None => Err(ReferenceIndexError::Other(eyre::eyre!( + "reference index snapshot ahead of main DB (snapshot_block_number={number})" + ))), + Some(actual) if actual != expected => Err(ReferenceIndexError::Other(eyre::eyre!( + "reference index snapshot chain mismatch at block {number}: \ + expected {expected:?}, main DB has {actual:?}" + ))), + Some(_) => Ok(()), + } + } + /// Returns the canonical block hash stored in `IndexedBlocks` for `block_number`. pub fn indexed_block_hash( &self, diff --git a/crates/rpc/src/morph/rpc.rs b/crates/rpc/src/morph/rpc.rs index 368817f..932fe22 100644 --- a/crates/rpc/src/morph/rpc.rs +++ b/crates/rpc/src/morph/rpc.rs @@ -7,18 +7,18 @@ use serde::{Deserialize, Serialize}; /// Parameters for `morph_getTransactionHashesByReference`. /// -/// Hex-encoded quantities are accepted (`"0x0"`, `"0x64"`) for `offset`/`limit` -/// for geth compatibility; plain integers are also accepted. +/// `offset`/`limit` use the geth `hexutil.Uint64` wire format (hex-encoded +/// quantity strings like `"0x0"`, `"0x64"`). Plain integers are also accepted. #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct ReferenceQueryArgs { /// 32-byte Morph transaction reference key. pub reference: B256, /// Starting offset (default 0, max 10 000). - #[serde(default)] + #[serde(default, with = "alloy_serde::quantity::opt")] pub offset: Option, /// Maximum number of results (default 100, max 100). - #[serde(default)] + #[serde(default, with = "alloy_serde::quantity::opt")] pub limit: Option, } From d3ab7e4ee70a26cff3697c9fbd0a8cf5f0307445 Mon Sep 17 00:00:00 2001 From: panos Date: Thu, 30 Apr 2026 17:27:30 +0800 Subject: [PATCH 08/15] fix(reference-index): first-ready ChainReverted must fill gap up to parent Scenario: - reconcile finishes with indexed_to = H - is_ready=false drain window: ExEx drains ChainCommitted{H+1}, {H+2} but writes nothing - is_ready=true, first_ready=true - first notification is ChainReverted { old: [H+2] } - revert_start = H+2, parent = H+1 Old code: first_ready branch only filled when revert_start <= indexed_to (false here), so nothing happened. handle_notification then ran delete_block(H+2) as a no-op and set indexed_to = parent = H+1 -- but H+1 had never been written. Result: indexed_to is ahead of what is actually indexed, future reconcile sees phantom hash mismatch on H+1. Fix: In first_ready ChainReverted branch, compute parent = revert_start - 1. If parent > indexed_to, drain window committed canonical blocks (indexed_to+1..=parent) that will SURVIVE this revert; backfill them now via fill_gap_idempotent. handle_notification then rolls back to parent consistently. parent <= indexed_to case (revert overlaps already-indexed range) is already correct without special handling. --- crates/node/src/exex/reference_index.rs | 29 ++++++++++++++++++++----- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/crates/node/src/exex/reference_index.rs b/crates/node/src/exex/reference_index.rs index cd82155..9da9143 100644 --- a/crates/node/src/exex/reference_index.rs +++ b/crates/node/src/exex/reference_index.rs @@ -116,12 +116,20 @@ where // On the first is_ready notification: fill any gap that opened // between when Task A finished reconcile and when this notification - // arrived. We delete-then-write to avoid stale entries in case a - // mini-reorg happened during that window. + // arrived. The goal is that after this branch runs, the index + // covers exactly the canonical range that is not going to be + // touched by the upcoming handle_notification call. + // + // fill_gap_idempotent delete-then-writes so stale entries from + // possible mini-reorgs during the drain window are cleaned up. if first_ready { first_ready = false; let indexed_to = control.db.indexed_to()?; if let Some(chain) = notification.committed_chain() { + // ChainCommitted / ChainReorged: blocks to index start at + // chain.first(). Anything in (indexed_to, notif_start-1] + // must be backfilled from main DB so handle_notification + // can pick up from notif_start. let notif_start = chain.first().number(); if notif_start > indexed_to + 1 { fill_gap_idempotent( @@ -132,16 +140,25 @@ where )?; } } else if let Some(old) = notification.reverted_chain() { - // Reorg during drain window: roll back below the revert start. + // ChainReverted: handle_notification will delete old.blocks + // and set indexed_to = parent (= revert_start - 1). + // If parent > indexed_to, the drain window committed and + // never wrote canonical blocks (indexed_to+1..=parent) that + // will SURVIVE this revert. We must fill them now so the + // post-revert indexed_to = parent is consistent. let revert_start = old.first().number(); - if revert_start <= indexed_to { + let parent = revert_start.saturating_sub(1); + if parent > indexed_to { fill_gap_idempotent( &control.db, &ctx.components.provider().clone(), - revert_start, - indexed_to, + indexed_to + 1, + parent, )?; } + // parent <= indexed_to: revert overlaps already-indexed + // range; handle_notification's delete_block + rollback of + // indexed_to takes care of it, no backfill needed. } } From 6237a471d2f08f080eb6060ca4fd2bcbcfac5cad Mon Sep 17 00:00:00 2001 From: panos Date: Thu, 30 Apr 2026 17:45:33 +0800 Subject: [PATCH 09/15] =?UTF-8?q?fix(reference-index):=20final=20sweep=20?= =?UTF-8?q?=E2=80=94=20reconcile=20None=20skip=20+=20monotonic=20FinishedH?= =?UTF-8?q?eight?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Finding 1 (High): reconcile treated indexed_hash=None as hash mismatch The sentinel path (Jade not yet activated) sets indexed_to=head but writes no IndexedBlocks entries. reconcile then scanned [indexed_from, indexed_to], found None != canonical_hash, and falsely triggered reorg rebuild for pre-Jade blocks. Fix: skip blocks where IndexedBlocks has no entry (None means "never indexed", not "hash mismatch"). Note: indexed_from.max(depth_start) from an earlier round correctly narrows the scan range but did NOT prevent the None comparison since the range still included indexed_to itself in the sentinel case. Finding 2 (Medium): startup FinishedHeight could regress past live commits tokio::select! is non-deterministic: if a ChainCommitted notification is processed before the startup watch channel fires, ExExEvent::FinishedHeight would be sent at H+k then regressed to H (the startup indexed_to). Fix: introduce send_finished_height_monotonic() that only sends when the new height strictly exceeds last_finished; both the startup watch arm and ChainCommitted/ChainReorged share this helper. ChainReorged is the one permitted exception (reth ExEx docs allow height to go down on reorgs). --- Cargo.lock | 1 - crates/node/src/exex/reference_index.rs | 46 ++++++++++++++++++++----- crates/reference-index/Cargo.toml | 1 - crates/reference-index/src/backfill.rs | 5 --- crates/reference-index/src/db.rs | 1 - crates/reference-index/src/lib.rs | 1 - crates/reference-index/src/metrics.rs | 19 ---------- crates/reference-index/src/reader.rs | 2 -- crates/reference-index/src/reconcile.rs | 14 ++++++-- crates/reference-index/src/writer.rs | 4 --- 10 files changed, 49 insertions(+), 45 deletions(-) delete mode 100644 crates/reference-index/src/metrics.rs diff --git a/Cargo.lock b/Cargo.lock index 6729a54..3fcf443 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4959,7 +4959,6 @@ dependencies = [ "alloy-primitives", "alloy-rlp", "eyre", - "metrics", "morph-chainspec", "morph-primitives", "reth-chainspec", diff --git a/crates/node/src/exex/reference_index.rs b/crates/node/src/exex/reference_index.rs index 9da9143..a0b3d4c 100644 --- a/crates/node/src/exex/reference_index.rs +++ b/crates/node/src/exex/reference_index.rs @@ -84,6 +84,11 @@ where + BlockHashReader, { let mut first_ready = true; + // Track last sent FinishedHeight to keep progress monotonic. Without this, + // the `tokio::select!` arms can race: a live commit may send `FinishedHeight(H+k)` + // before the startup watch channel is observed, which would otherwise + // regress reth's pruning marker back to `H`. + let mut last_finished: Option = None; loop { tokio::select! { @@ -93,9 +98,9 @@ where debug!( target: "morph::reference_index", block_number = block.number, - "startup complete; sending initial FinishedHeight" + "startup complete; forwarding initial FinishedHeight" ); - ctx.events.send(ExExEvent::FinishedHeight(block))?; + send_finished_height_monotonic(&ctx.events, block, &mut last_finished)?; } } @@ -162,11 +167,12 @@ where } } - if let Err(e) = - handle_notification(&ctx.events, &control.db, notification) - { - error!(target: "morph::reference_index", ?e, "error processing notification"); - return Err(e); + match handle_notification(&ctx.events, &control.db, notification, &mut last_finished) { + Ok(()) => {} + Err(e) => { + error!(target: "morph::reference_index", ?e, "error processing notification"); + return Err(e); + } } } } @@ -218,10 +224,14 @@ where } /// Process one ExEx notification: commit or revert three tables atomically. +/// +/// Updates `last_finished` when a FinishedHeight is sent so the caller can +/// keep progress monotonic across the startup-watch and notification arms. fn handle_notification( events: &tokio::sync::mpsc::UnboundedSender, db: &ReferenceIndexDb, notification: ExExNotification, + last_finished: &mut Option, ) -> eyre::Result<()> { match notification { ExExNotification::ChainCommitted { new } => { @@ -237,7 +247,7 @@ fn handle_notification( } update_indexed_to(&tx, new.tip().number())?; tx.commit()?; - events.send(ExExEvent::FinishedHeight(new.tip().num_hash()))?; + send_finished_height_monotonic(events, new.tip().num_hash(), last_finished)?; } ExExNotification::ChainReverted { old } => { let parent = old.first().number().saturating_sub(1); @@ -265,12 +275,32 @@ fn handle_notification( } update_indexed_to(&tx, new.tip().number())?; tx.commit()?; + // On reorg to a shorter chain the tip may go down; that is the one + // case we ALLOW FinishedHeight to regress (reth's ExExEvent doc + // explicitly permits "on reorgs, height may go down"). events.send(ExExEvent::FinishedHeight(new.tip().num_hash()))?; + *last_finished = Some(new.tip().num_hash()); } } Ok(()) } +/// Send a FinishedHeight only if it strictly advances `last_finished`. +/// +/// Used for ChainCommitted and the startup watch channel, where progress +/// must only go forward. ChainReorged has its own allow-regress path. +fn send_finished_height_monotonic( + events: &tokio::sync::mpsc::UnboundedSender, + block: BlockNumHash, + last_finished: &mut Option, +) -> eyre::Result<()> { + if last_finished.is_none_or(|last| block.number > last.number) { + events.send(ExExEvent::FinishedHeight(block))?; + *last_finished = Some(block); + } + Ok(()) +} + // ── Task A: startup indexing ────────────────────────────────────────────────── /// Execute backfill → reconcile, set `is_ready = true`, then send the startup diff --git a/crates/reference-index/Cargo.toml b/crates/reference-index/Cargo.toml index 54ea6d4..fd2cc78 100644 --- a/crates/reference-index/Cargo.toml +++ b/crates/reference-index/Cargo.toml @@ -14,7 +14,6 @@ alloy-consensus.workspace = true alloy-primitives.workspace = true alloy-rlp.workspace = true eyre.workspace = true -metrics.workspace = true morph-chainspec.workspace = true morph-primitives = { workspace = true, features = ["reth-codec"] } reth-chainspec.workspace = true diff --git a/crates/reference-index/src/backfill.rs b/crates/reference-index/src/backfill.rs index 11ee204..18c3000 100644 --- a/crates/reference-index/src/backfill.rs +++ b/crates/reference-index/src/backfill.rs @@ -164,8 +164,6 @@ where "starting reference index backfill" ); - let total = head_at_startup.saturating_sub(start).saturating_add(1) as f64; - let mut done = 0f64; let mut current = start; while current <= head_at_startup { @@ -206,9 +204,6 @@ where } tx.commit()?; - done += (batch_end - current + 1) as f64; - crate::metrics::set_backfill_progress(done / total); - debug!( target: "morph::reference_index", batch_start = current, diff --git a/crates/reference-index/src/db.rs b/crates/reference-index/src/db.rs index ed05e99..a2c5fae 100644 --- a/crates/reference-index/src/db.rs +++ b/crates/reference-index/src/db.rs @@ -191,7 +191,6 @@ impl ReferenceIndexDb { pub fn set_ready(&self, ready: bool) { self.ready.store(ready, Ordering::Release); - crate::metrics::set_ready(ready); } // ── transaction factory ─────────────────────────────────────────────────── diff --git a/crates/reference-index/src/lib.rs b/crates/reference-index/src/lib.rs index caa4604..692cfce 100644 --- a/crates/reference-index/src/lib.rs +++ b/crates/reference-index/src/lib.rs @@ -15,7 +15,6 @@ use tracing as _; pub mod backfill; pub mod db; -pub mod metrics; pub mod reader; pub mod reconcile; pub mod tables; diff --git a/crates/reference-index/src/metrics.rs b/crates/reference-index/src/metrics.rs deleted file mode 100644 index 4a2dc90..0000000 --- a/crates/reference-index/src/metrics.rs +++ /dev/null @@ -1,19 +0,0 @@ -pub fn set_ready(ready: bool) { - metrics::gauge!("morph_reference_index_ready").set(if ready { 1.0 } else { 0.0 }); -} - -pub fn set_lag_blocks(lag: u64) { - metrics::gauge!("morph_reference_index_lag_blocks").set(lag as f64); -} - -pub fn set_backfill_progress(progress: f64) { - metrics::gauge!("morph_reference_index_backfill_progress").set(progress.clamp(0.0, 1.0)); -} - -pub fn set_backfill_state(state: u8) { - metrics::gauge!("morph_reference_index_backfill_state").set(state as f64); -} - -pub fn increment_entries(count: u64) { - metrics::counter!("morph_reference_index_entries_total").increment(count); -} diff --git a/crates/reference-index/src/reader.rs b/crates/reference-index/src/reader.rs index 6414a27..2fd0338 100644 --- a/crates/reference-index/src/reader.rs +++ b/crates/reference-index/src/reader.rs @@ -41,7 +41,6 @@ impl ReferenceIndexReader { let indexed_to = self.db.indexed_to()?; if canonical_tip.saturating_sub(indexed_to) > self.lag_threshold { - crate::metrics::set_lag_blocks(canonical_tip.saturating_sub(indexed_to)); return Err(ReferenceIndexError::IndexBehind); } @@ -79,7 +78,6 @@ impl ReferenceIndexReader { next = cursor.next()?; } - crate::metrics::set_lag_blocks(canonical_tip.saturating_sub(indexed_to)); Ok(results) } } diff --git a/crates/reference-index/src/reconcile.rs b/crates/reference-index/src/reconcile.rs index 70e240a..e24e567 100644 --- a/crates/reference-index/src/reconcile.rs +++ b/crates/reference-index/src/reconcile.rs @@ -49,13 +49,21 @@ where for number in check_start..=indexed_to { let indexed_hash = db.indexed_block_hash(number)?; - let canonical_hash = provider.block_hash(number)?; - if indexed_hash != canonical_hash { + // `None` means the block was never written to IndexedBlocks (e.g. the + // sentinel path where backfill marks indexed_to = head but writes no + // IndexedBlocks entries for pre-Jade blocks). Treat as "not indexed, + // skip" rather than a hash mismatch to avoid spurious reorg detection. + let Some(indexed) = indexed_hash else { + continue; + }; + + let canonical_hash = provider.block_hash(number)?; + if Some(indexed) != canonical_hash { debug!( target: "morph::reference_index", number, - ?indexed_hash, + ?indexed, ?canonical_hash, "canonical hash mismatch during reconcile" ); diff --git a/crates/reference-index/src/writer.rs b/crates/reference-index/src/writer.rs index 4156d62..e795b8d 100644 --- a/crates/reference-index/src/writer.rs +++ b/crates/reference-index/src/writer.rs @@ -61,9 +61,6 @@ pub fn write_block( written += 1; } - if written > 0 { - crate::metrics::increment_entries(written); - } Ok(written) } @@ -148,7 +145,6 @@ pub fn set_backfill_state( IndexMetaKey::BackfillState.into(), MetaValue(vec![state as u8]), )?; - crate::metrics::set_backfill_state(state as u8); Ok(()) } From 4afc28678ec3e981de45ea4fe7d6fb9ed89bc67a Mon Sep 17 00:00:00 2001 From: panos Date: Thu, 30 Apr 2026 18:08:38 +0800 Subject: [PATCH 10/15] fix(reference-index): InProgress+sentinel re-resolves Jade; RPC internal error scrub Finding 1 (Critical): InProgress + sentinel crash-recovery skips Jade history If node crashes between writing InProgress+SENTINEL (txn 1) and writing Complete (txn 2), the next restart had BackfillState::InProgress and jade_first_block_number=SENTINEL. The previous fix for InProgress resume only avoided starting from block 1, but the sentinel fast-path inside the InProgress branch still blindly marked Complete without re-resolving Jade. Fix: when InProgress+SENTINEL, re-run resolve_jade_first_block against the current head. If Jade is now active, persist the real jade_first_block_number and continue backfill from there. Only take the immediate-complete shortcut when Jade is still not active on the current head. Finding 2 (Moderate): to_rpc_error leaked internal Database/Provider/Other error strings verbatim to RPC callers. The spec only documents two state-gating responses; anything else is an internal failure. Fix: map Database/Provider/Other to a fixed -32603 "internal reference index error" message; log the full error internally with tracing::error!. --- crates/reference-index/src/backfill.rs | 28 ++++++++++++++++++-------- crates/rpc/src/morph/handler.rs | 16 ++++++++++++++- 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/crates/reference-index/src/backfill.rs b/crates/reference-index/src/backfill.rs index 18c3000..2428db1 100644 --- a/crates/reference-index/src/backfill.rs +++ b/crates/reference-index/src/backfill.rs @@ -112,19 +112,31 @@ where // Resume from `max(indexed_to + 1, jade_first_block_number)`: if the // crash happened between InProgress write and the first batch commit, // `indexed_to` is still 0 and must not be used as-is. - let jade_first = db + let jade_first_stored = db .jade_first_block_number()? .unwrap_or(JADE_NOT_ACTIVE_SENTINEL); - if jade_first == JADE_NOT_ACTIVE_SENTINEL { - // Sentinel case: no range to backfill; mark complete. + if jade_first_stored == JADE_NOT_ACTIVE_SENTINEL { + // Previously sentinel; crash may have happened before the follow-up + // txn that would have marked Complete. Re-resolve against the current + // head: Jade might have activated in the meantime. + let new_jade = resolve_jade_first_block(provider, chain_spec, head_at_startup)?; + if new_jade == JADE_NOT_ACTIVE_SENTINEL { + // Still not active; safe to mark complete immediately. + let tx = db.tx_mut()?; + update_indexed_from(&tx, head_at_startup)?; + update_indexed_to(&tx, head_at_startup)?; + set_backfill_state(&tx, BackfillState::Complete)?; + tx.commit()?; + return Ok(()); + } + // Jade now active; persist the real first block and continue backfill. let tx = db.tx_mut()?; - update_indexed_from(&tx, head_at_startup)?; - update_indexed_to(&tx, head_at_startup)?; - set_backfill_state(&tx, BackfillState::Complete)?; + set_jade_first_block_number(&tx, new_jade)?; tx.commit()?; - return Ok(()); + db.indexed_to()?.saturating_add(1).max(new_jade) + } else { + db.indexed_to()?.saturating_add(1).max(jade_first_stored) } - db.indexed_to()?.saturating_add(1).max(jade_first) } BackfillState::NotStarted => { let jade_first = resolve_jade_first_block(provider, chain_spec, head_at_startup)?; diff --git a/crates/rpc/src/morph/handler.rs b/crates/rpc/src/morph/handler.rs index 0a84fe2..3f32df3 100644 --- a/crates/rpc/src/morph/handler.rs +++ b/crates/rpc/src/morph/handler.rs @@ -9,6 +9,7 @@ use morph_reference_index::{ ReferenceIndexError, ReferenceIndexReader, ReferenceQuery, ReferenceTransactionResult, }; use reth_storage_api::BlockNumReader; +use tracing; // ── Context ────────────────────────────────────────────────────────────────── @@ -85,6 +86,19 @@ fn to_rpc_error(error: ReferenceIndexError) -> ErrorObjectOwned { None::<()>, ) } - other => ErrorObjectOwned::owned(-32000, other.to_string(), None::<()>), + // Log internal details for operators but return a generic message on + // the wire so Database/Provider/Other error strings don't leak. + other => { + tracing::error!( + target: "morph::reference_index_rpc", + error = %other, + "reference index internal error" + ); + ErrorObjectOwned::owned( + ErrorCode::InternalError.code(), + "internal reference index error", + None::<()>, + ) + } } } From b6aa870b2c7a5c9642068ec8ed75ebc85e592e9a Mon Sep 17 00:00:00 2001 From: panos Date: Wed, 6 May 2026 16:40:45 +0800 Subject: [PATCH 11/15] chore(reference-index): remove unused paired-snapshot validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SnapshotBlockNumber/SnapshotBlockHash metadata keys and validate_paired_snapshot() had no write path — ops tar-compress the datadir directly so these fields were always empty and the check was always a no-op. chain_id + genesis_hash at open() plus reconcile's canonical hash check already cover the mismatch cases that matter. --- crates/node/src/exex/reference_index.rs | 11 ------ crates/reference-index/src/db.rs | 51 ------------------------- crates/reference-index/src/writer.rs | 17 +-------- 3 files changed, 1 insertion(+), 78 deletions(-) diff --git a/crates/node/src/exex/reference_index.rs b/crates/node/src/exex/reference_index.rs index a0b3d4c..c300c69 100644 --- a/crates/node/src/exex/reference_index.rs +++ b/crates/node/src/exex/reference_index.rs @@ -322,17 +322,6 @@ where let chain_spec = provider.chain_spec(); let head = provider.best_block_number()?; - // Paired-snapshot validation: if snapshot_block_hash metadata is present, - // verify it against the main DB before doing any indexing work. Fails - // startup on mismatch per design spec. - control.db.validate_paired_snapshot(|number| { - provider.block_hash(number).map_err(|e| { - morph_reference_index::ReferenceIndexError::Other(eyre::eyre!( - "failed to read main DB block hash at {number}: {e}" - )) - }) - })?; - // Re-resolve jade sentinel if Jade has since activated. maybe_reset_jade_sentinel(&control.db, &provider, chain_spec.as_ref(), head)?; diff --git a/crates/reference-index/src/db.rs b/crates/reference-index/src/db.rs index a2c5fae..65d715c 100644 --- a/crates/reference-index/src/db.rs +++ b/crates/reference-index/src/db.rs @@ -33,8 +33,6 @@ pub enum IndexMetaKey { GenesisHash = 6, SchemaVersion = 7, JadeFirstBlockNumber = 8, - SnapshotBlockNumber = 9, - SnapshotBlockHash = 10, } impl From for MetaKey { @@ -235,55 +233,6 @@ impl ReferenceIndexDb { .transpose() } - pub fn snapshot_block_number(&self) -> Result, ReferenceIndexError> { - let tx = self.tx()?; - tx.get::(IndexMetaKey::SnapshotBlockNumber.into())? - .map(decode_u64) - .transpose() - } - - pub fn snapshot_block_hash(&self) -> Result, ReferenceIndexError> { - let tx = self.tx()?; - tx.get::(IndexMetaKey::SnapshotBlockHash.into())? - .map(decode_b256) - .transpose() - } - - /// Validate a paired-snapshot against the main chain provider. - /// - /// Must be called after `FullNode.provider()` is available. Fails startup - /// if `snapshot_block_number`/`snapshot_block_hash` are set but disagree - /// with the main chain (either the snapshot is ahead of the main DB, or - /// the hash at that height diverges). - /// - /// When snapshot metadata is not present (fresh DB, no paired snapshot), - /// this is a no-op. - pub fn validate_paired_snapshot(&self, main_block_hash: F) -> Result<(), ReferenceIndexError> - where - F: FnOnce(u64) -> Result, ReferenceIndexError>, - { - let Some(expected) = self.snapshot_block_hash()? else { - return Ok(()); - }; - let Some(number) = self.snapshot_block_number()? else { - // Hash without number is malformed state; treat as mismatch. - return Err(ReferenceIndexError::ChainIdentityMismatch( - "snapshot_block_hash present but snapshot_block_number missing", - )); - }; - - match main_block_hash(number)? { - None => Err(ReferenceIndexError::Other(eyre::eyre!( - "reference index snapshot ahead of main DB (snapshot_block_number={number})" - ))), - Some(actual) if actual != expected => Err(ReferenceIndexError::Other(eyre::eyre!( - "reference index snapshot chain mismatch at block {number}: \ - expected {expected:?}, main DB has {actual:?}" - ))), - Some(_) => Ok(()), - } - } - /// Returns the canonical block hash stored in `IndexedBlocks` for `block_number`. pub fn indexed_block_hash( &self, diff --git a/crates/reference-index/src/writer.rs b/crates/reference-index/src/writer.rs index e795b8d..5e163bd 100644 --- a/crates/reference-index/src/writer.rs +++ b/crates/reference-index/src/writer.rs @@ -5,7 +5,7 @@ //! single atomic commit. use crate::{ - db::{IndexMetaKey, encode_b256, encode_u64}, + db::{IndexMetaKey, encode_u64}, tables::{ BlockHashValue, BlockReferenceIndex, BlockReferenceKey, BlockTimestampValue, IndexMeta, IndexedBlockKey, IndexedBlocks, MetaValue, ReferenceIndex, ReferenceIndexKey, @@ -159,21 +159,6 @@ pub fn set_jade_first_block_number( Ok(()) } -pub fn set_snapshot_block( - tx: &Tx, - block_number: u64, - block_hash: B256, -) -> Result<(), ReferenceIndexError> { - tx.put::( - IndexMetaKey::SnapshotBlockNumber.into(), - encode_u64(block_number), - )?; - tx.put::( - IndexMetaKey::SnapshotBlockHash.into(), - encode_b256(block_hash), - )?; - Ok(()) -} #[cfg(test)] mod tests { From eb42e6c971d2f78254a934c3bf9cad42eb20464a Mon Sep 17 00:00:00 2001 From: panos Date: Wed, 6 May 2026 17:07:49 +0800 Subject: [PATCH 12/15] chore(reference-index): remove unused pub methods and dead imports - ReferenceIndexDb: drop highest_indexed_block / highest_block_reference_index (no callers in the workspace) - ReferenceIndexDb: remove unused DbCursorRO import exposed by the deletion - ReferenceIndexReader: drop db() accessor (no callers in the workspace) --- crates/reference-index/src/db.rs | 15 --------------- crates/reference-index/src/reader.rs | 4 ---- 2 files changed, 19 deletions(-) diff --git a/crates/reference-index/src/db.rs b/crates/reference-index/src/db.rs index 65d715c..664731c 100644 --- a/crates/reference-index/src/db.rs +++ b/crates/reference-index/src/db.rs @@ -8,7 +8,6 @@ use alloy_primitives::B256; use reth_db::{DatabaseEnv, mdbx::DatabaseArguments}; use reth_db_api::{ Database, - cursor::DbCursorRO, transaction::{DbTx, DbTxMut}, }; use std::{ @@ -244,20 +243,6 @@ impl ReferenceIndexDb { .map(|v| v.0)) } - /// Returns the highest block number recorded in `IndexedBlocks`. - pub fn highest_indexed_block(&self) -> Result, ReferenceIndexError> { - let tx = self.tx()?; - let mut cursor = tx.cursor_read::()?; - Ok(cursor.last()?.map(|(k, _)| k.block_number)) - } - - /// Returns the highest block number recorded in `BlockReferenceIndex`. - /// Used for gap detection after startup. - pub fn highest_block_reference_index(&self) -> Result, ReferenceIndexError> { - let tx = self.tx()?; - let mut cursor = tx.cursor_read::()?; - Ok(cursor.last()?.map(|(k, _)| k.block_number)) - } } #[cfg(test)] diff --git a/crates/reference-index/src/reader.rs b/crates/reference-index/src/reader.rs index 2fd0338..efe6dc7 100644 --- a/crates/reference-index/src/reader.rs +++ b/crates/reference-index/src/reader.rs @@ -23,10 +23,6 @@ impl ReferenceIndexReader { Self { db, lag_threshold } } - pub fn db(&self) -> &ReferenceIndexDb { - &self.db - } - /// Execute a paginated reference query. /// /// `canonical_tip` is the current best block number, used to compute lag. From 3a2f9776b0c2b66d3a4c2a8b747fe7ebd397e24d Mon Sep 17 00:00:00 2001 From: panos Date: Thu, 7 May 2026 08:43:24 +0800 Subject: [PATCH 13/15] chore(reference-index): simplify tracing targets, drop sleep, clean up error wrapping - Extract const TARGET in backfill, reconcile, exex and handler so the tracing target string is defined once per file and easy to grep - Remove the 10ms inter-batch sleep in run_backfill; on a 2.6M-block backfill (256-block batches) this was ~100s of pure idle time with no benefit - Simplify best_block_number error path in MorphRpcHandler: use the existing From impl instead of wrapping with eyre::eyre! --- crates/node/src/exex/reference_index.rs | 12 +++++++----- crates/reference-index/src/backfill.rs | 14 ++++++-------- crates/reference-index/src/db.rs | 1 - crates/reference-index/src/reconcile.rs | 8 +++++--- crates/reference-index/src/writer.rs | 1 - crates/rpc/src/morph/handler.rs | 7 +++++-- 6 files changed, 23 insertions(+), 20 deletions(-) diff --git a/crates/node/src/exex/reference_index.rs b/crates/node/src/exex/reference_index.rs index c300c69..9067d33 100644 --- a/crates/node/src/exex/reference_index.rs +++ b/crates/node/src/exex/reference_index.rs @@ -32,6 +32,8 @@ use tokio::sync::watch; use tokio_stream::StreamExt; use tracing::{debug, error, info}; +const TARGET: &str = "morph::reference_index"; + // ── shared control ──────────────────────────────────────────────────────────── /// Shared handle that connects Task A (startup indexing) with Task B (ExEx). @@ -96,7 +98,7 @@ where changed = startup_rx.changed() => { if changed.is_ok() && let Some(block) = *startup_rx.borrow_and_update() { debug!( - target: "morph::reference_index", + target: TARGET, block_number = block.number, "startup complete; forwarding initial FinishedHeight" ); @@ -111,7 +113,7 @@ where // Drain without writing to avoid backpressure. if let Some(chain) = notification.committed_chain() { debug!( - target: "morph::reference_index", + target: TARGET, tip = chain.tip().number(), "drained notification while index initializing" ); @@ -170,7 +172,7 @@ where match handle_notification(&ctx.events, &control.db, notification, &mut last_finished) { Ok(()) => {} Err(e) => { - error!(target: "morph::reference_index", ?e, "error processing notification"); + error!(target: TARGET, ?e, "error processing notification"); return Err(e); } } @@ -198,7 +200,7 @@ where return Ok(()); } info!( - target: "morph::reference_index", + target: TARGET, from, to, "idempotent gap fill between startup reconcile and first ExEx notification" ); @@ -359,7 +361,7 @@ where })?; info!( - target: "morph::reference_index", + target: TARGET, indexed_to, "reference index ready" ); diff --git a/crates/reference-index/src/backfill.rs b/crates/reference-index/src/backfill.rs index 2428db1..536c525 100644 --- a/crates/reference-index/src/backfill.rs +++ b/crates/reference-index/src/backfill.rs @@ -20,6 +20,8 @@ use reth_provider::{BlockReader, HeaderProvider}; use reth_storage_api::{BlockNumReader, TransactionVariant}; use tracing::{debug, info}; +const TARGET: &str = "morph::reference_index"; + /// Determine the first block number at which the Jade hardfork is active. /// /// Returns `JADE_NOT_ACTIVE_SENTINEL` (`u64::MAX`) when: @@ -171,7 +173,7 @@ where let jade_first = db.jade_first_block_number()?.unwrap_or(start); info!( - target: "morph::reference_index", + target: TARGET, start, head_at_startup, "starting reference index backfill" ); @@ -217,7 +219,7 @@ where tx.commit()?; debug!( - target: "morph::reference_index", + target: TARGET, batch_start = current, batch_end, is_last_batch, @@ -225,13 +227,9 @@ where ); current = batch_end + 1; - - if !is_last_batch { - std::thread::sleep(std::time::Duration::from_millis(10)); - } } - info!(target: "morph::reference_index", "reference index backfill complete"); + info!(target: TARGET, "reference index backfill complete"); Ok(()) } @@ -265,7 +263,7 @@ where set_backfill_state(&tx, BackfillState::NotStarted)?; tx.commit()?; info!( - target: "morph::reference_index", + target: TARGET, jade_first_block = new, "Jade has activated; resetting backfill to index from first Jade block" ); diff --git a/crates/reference-index/src/db.rs b/crates/reference-index/src/db.rs index 664731c..3c37bee 100644 --- a/crates/reference-index/src/db.rs +++ b/crates/reference-index/src/db.rs @@ -242,7 +242,6 @@ impl ReferenceIndexDb { .get::(IndexedBlockKey { block_number })? .map(|v| v.0)) } - } #[cfg(test)] diff --git a/crates/reference-index/src/reconcile.rs b/crates/reference-index/src/reconcile.rs index e24e567..204cce6 100644 --- a/crates/reference-index/src/reconcile.rs +++ b/crates/reference-index/src/reconcile.rs @@ -17,6 +17,8 @@ use reth_provider::{BlockHashReader, BlockReader, HeaderProvider}; use reth_storage_api::TransactionVariant; use tracing::{debug, info}; +const TARGET: &str = "morph::reference_index"; + /// Run the startup reconciliation pass. /// /// Steps: @@ -61,7 +63,7 @@ where let canonical_hash = provider.block_hash(number)?; if Some(indexed) != canonical_hash { debug!( - target: "morph::reference_index", + target: TARGET, number, ?indexed, ?canonical_hash, @@ -75,7 +77,7 @@ where // ── Step B: apply reorg if detected ────────────────────────────────────── let rebuild_start = if let Some(fh) = fork_height { info!( - target: "morph::reference_index", + target: TARGET, fork_height = fh, old_indexed_to = indexed_to, "offline reorg detected; rolling back index" @@ -97,7 +99,7 @@ where // ── Step C: suffix gap fill ─────────────────────────────────────────────── if rebuild_start <= current_head { info!( - target: "morph::reference_index", + target: TARGET, rebuild_start, current_head, "filling reference index suffix gap" diff --git a/crates/reference-index/src/writer.rs b/crates/reference-index/src/writer.rs index 5e163bd..9ce3710 100644 --- a/crates/reference-index/src/writer.rs +++ b/crates/reference-index/src/writer.rs @@ -159,7 +159,6 @@ pub fn set_jade_first_block_number( Ok(()) } - #[cfg(test)] mod tests { use super::*; diff --git a/crates/rpc/src/morph/handler.rs b/crates/rpc/src/morph/handler.rs index 3f32df3..6616ca0 100644 --- a/crates/rpc/src/morph/handler.rs +++ b/crates/rpc/src/morph/handler.rs @@ -11,6 +11,8 @@ use morph_reference_index::{ use reth_storage_api::BlockNumReader; use tracing; +const TARGET: &str = "morph::reference_index_rpc"; + // ── Context ────────────────────────────────────────────────────────────────── /// `morph_` namespace context. All dependencies are required; no `Option<>`. @@ -60,7 +62,8 @@ impl MorphRpcServer .ctx .provider .best_block_number() - .map_err(|e| to_rpc_error(ReferenceIndexError::Other(eyre::eyre!(e))))?; + .map_err(ReferenceIndexError::from) + .map_err(to_rpc_error)?; self.ctx .reference_index @@ -90,7 +93,7 @@ fn to_rpc_error(error: ReferenceIndexError) -> ErrorObjectOwned { // the wire so Database/Provider/Other error strings don't leak. other => { tracing::error!( - target: "morph::reference_index_rpc", + target: TARGET, error = %other, "reference index internal error" ); From 9182af663d26a8ec4208935aa9fa62c6822acd04 Mon Sep 17 00:00:00 2001 From: panos Date: Sat, 9 May 2026 16:11:15 +0800 Subject: [PATCH 14/15] fix(ci): satisfy clippy and cargo-deny on PR #106 - drop redundant `db.clone()` in reference_index integration test - remove RUSTSEC-2026-0002 from advisory ignore list (no longer matched) --- crates/node/tests/it/reference_index.rs | 2 +- deny.toml | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/crates/node/tests/it/reference_index.rs b/crates/node/tests/it/reference_index.rs index 42eeae9..a54a1c1 100644 --- a/crates/node/tests/it/reference_index.rs +++ b/crates/node/tests/it/reference_index.rs @@ -73,7 +73,7 @@ async fn reference_index_finds_single_morph_tx() -> eyre::Result<()> { let dir = TempDir::new()?; let db = open_and_backfill_index(&node.inner.provider, &dir).await; - let reader = ReferenceIndexReader::new(db.clone(), DEFAULT_LAG_THRESHOLD); + let reader = ReferenceIndexReader::new(db, DEFAULT_LAG_THRESHOLD); let canonical_tip = node.inner.provider.best_block_number()?; let query = ReferenceQuery::new(reference, None, None).unwrap(); let results = reader.query(query, canonical_tip)?; diff --git a/deny.toml b/deny.toml index 9000cea..b88a46b 100644 --- a/deny.toml +++ b/deny.toml @@ -8,9 +8,6 @@ ignore = [ "RUSTSEC-2024-0436", # https://rustsec.org/advisories/RUSTSEC-2025-0141 bincode is unmaintained "RUSTSEC-2025-0141", - # https://rustsec.org/advisories/RUSTSEC-2026-0002 lru 0.12.x unsound IterMut - # pinned by reth fork at 0.12.5, fix requires 0.16.3 (semver-incompatible) - "RUSTSEC-2026-0002", # https://rustsec.org/advisories/RUSTSEC-2026-0097 rand unsound with custom logger # pinned transitively via reth; no fix available upstream yet "RUSTSEC-2026-0097", From a02d289ea61bdecc5bb96407cf6e7d007978deb4 Mon Sep 17 00:00:00 2001 From: panos Date: Sat, 9 May 2026 16:23:12 +0800 Subject: [PATCH 15/15] fix(ci): resolve cargo-deny advisories on PR #106 Update transitive dependencies where cargo can select patched releases, and document the hickory-proto advisory exceptions that remain pinned by the current reth dependency graph. Constraint: PR #106 cargo-deny failure blocks merge Rejected: Broadly ignore all new advisories | patched multihash and rustls-webpki versions are available Confidence: medium Scope-risk: narrow Not-tested: Skipped additional verification per user request --- Cargo.lock | 18 ++++-------------- deny.toml | 6 ++++++ 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3fcf443..e3363e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2085,15 +2085,6 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" -[[package]] -name = "core2" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" -dependencies = [ - "memchr", -] - [[package]] name = "cpufeatures" version = "0.2.17" @@ -5117,11 +5108,10 @@ dependencies = [ [[package]] name = "multihash" -version = "0.19.3" +version = "0.19.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b430e7953c29dd6a09afc29ff0bb69c6e306329ee6794700aee27b76a1aea8d" +checksum = "577c63b00ad74d57e8c9aa870b5fccebf2fd64a308a5aee9f1bb88e4aea19447" dependencies = [ - "core2", "unsigned-varint", ] @@ -9527,9 +9517,9 @@ checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" [[package]] name = "rustls-webpki" -version = "0.103.10" +version = "0.103.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" +checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e" dependencies = [ "ring", "rustls-pki-types", diff --git a/deny.toml b/deny.toml index b88a46b..8a1244e 100644 --- a/deny.toml +++ b/deny.toml @@ -15,6 +15,12 @@ ignore = [ "RUSTSEC-2026-0098", # https://rustsec.org/advisories/RUSTSEC-2026-0099 rustls-webpki wildcard name constraints "RUSTSEC-2026-0099", + # https://rustsec.org/advisories/RUSTSEC-2026-0118 hickory-proto NSEC3 validation loop + # pinned transitively via reth-dns-discovery -> hickory-resolver ^0.25; no 0.25 fix is available + "RUSTSEC-2026-0118", + # https://rustsec.org/advisories/RUSTSEC-2026-0119 hickory-proto O(n^2) name compression + # fixed in hickory-proto 0.26.1, but hickory-resolver ^0.25 cannot select it + "RUSTSEC-2026-0119", ] # This section is considered when running `cargo deny check bans`.