diff --git a/Cargo.lock b/Cargo.lock index c2e1fc00702c..f90f2434bb91 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7456,6 +7456,7 @@ dependencies = [ "ic-crypto-test-utils-reproducible-rng", "ic-crypto-tree-hash", "ic-crypto-tree-hash-test-utils", + "ic-utils 0.9.0", "itertools 0.12.1", "leb128", "proptest", diff --git a/rs/canonical_state/src/lazy_tree_conversion.rs b/rs/canonical_state/src/lazy_tree_conversion.rs index aa85cf097c3d..3e8a6d7d7181 100644 --- a/rs/canonical_state/src/lazy_tree_conversion.rs +++ b/rs/canonical_state/src/lazy_tree_conversion.rs @@ -9,9 +9,10 @@ use crate::{ }; use LazyTree::Blob; use ic_canonical_state_tree_hash::{ - hash_tree::HashTree, + hash_tree::{HashTree, HashTreeError, hash_lazy_tree}, lazy_tree::{ - Lazy, LazyFork, LazyTree, blob, fork, materialize::materialize_partial, num, string, + Lazy, LazyFork, LazyTree, SubtreeExpander, SubtreeSource, blob, fork, + materialize::materialize_partial, num, string, }, }; use ic_crypto_tree_hash::{Label, Witness, sparse_labeled_tree_from_paths}; @@ -786,7 +787,7 @@ const CANISTER_NO_MODULE_LABELS: [&[u8]; 1] = [CONTROLLERS_LABEL]; #[derive(Clone)] struct CanisterFork<'a> { - canister: &'a CanisterState, + canister: &'a Arc, version: CertificationVersion, } @@ -849,6 +850,54 @@ impl<'a> LazyFork<'a> for CanisterFork<'a> { fn len(&self) -> usize { self.valid_labels().len() } + + /// A canister's certified subtree is stored as a reusable stub identified by + /// the backing `Arc` and the version-specific expander. An + /// unchanged canister keeps the same `Arc` (copy-on-write) and the same + /// expander, so its precomputed digest is reused from the baseline; any + /// mutation or version change yields a mismatched [`SubtreeSource`] and a + /// rebuild. + fn subtree_source(&self) -> Option { + Some(SubtreeSource::new( + self.canister, + select_canister_expander(self.version), + )) + } +} + +/// Rebuilds a canister's stubbed [subtree](`NodeKind::Stub`) for witness +/// generation, by recovering the `Arc` from the stub's +/// [`SubtreeSource`] and traversing its [`CanisterFork`]. +/// +/// The certification version (which the canonical encoding depends on) is baked +/// in as the const parameter `V`, so the stored function pointer alone fully +/// determines the expansion — see [`select_canister_expander`]. +fn expand_canister(source: &SubtreeSource) -> Result { + let canister = source.downcast::(); + let version = CertificationVersion::try_from(V) + .expect("const version parameter is a valid certification version"); + // `canister` (and thus the borrow below) outlives `hash_lazy_tree`, which + // returns an owned `HashTree`; no borrow escapes. + hash_lazy_tree(&fork(CanisterFork { + canister: &canister, + version, + })) +} + +/// Selects the [`expand_canister`] monomorphization for `version`, so the +/// resulting [`SubtreeExpander`] function pointer carries the version with it +/// (rather than replicating it in every stub). +fn select_canister_expander(version: CertificationVersion) -> SubtreeExpander { + match version { + CertificationVersion::V19 => expand_canister::<{ CertificationVersion::V19 as u32 }>, + CertificationVersion::V20 => expand_canister::<{ CertificationVersion::V20 as u32 }>, + CertificationVersion::V21 => expand_canister::<{ CertificationVersion::V21 as u32 }>, + CertificationVersion::V22 => expand_canister::<{ CertificationVersion::V22 as u32 }>, + CertificationVersion::V23 => expand_canister::<{ CertificationVersion::V23 as u32 }>, + CertificationVersion::V24 => expand_canister::<{ CertificationVersion::V24 as u32 }>, + CertificationVersion::V25 => expand_canister::<{ CertificationVersion::V25 as u32 }>, + CertificationVersion::V26 => expand_canister::<{ CertificationVersion::V26 as u32 }>, + } } fn api_boundary_nodes_as_tree( diff --git a/rs/canonical_state/tree_hash/BUILD.bazel b/rs/canonical_state/tree_hash/BUILD.bazel index 058109dcb4b2..ff93981289d7 100644 --- a/rs/canonical_state/tree_hash/BUILD.bazel +++ b/rs/canonical_state/tree_hash/BUILD.bazel @@ -10,6 +10,7 @@ rust_library( deps = [ # Keep sorted. "//rs/crypto/tree_hash", + "//rs/utils", "@crate_index//:itertools", "@crate_index//:leb128", "@crate_index//:scoped_threadpool", diff --git a/rs/canonical_state/tree_hash/Cargo.toml b/rs/canonical_state/tree_hash/Cargo.toml index c3793bff62ad..1594549c40ba 100644 --- a/rs/canonical_state/tree_hash/Cargo.toml +++ b/rs/canonical_state/tree_hash/Cargo.toml @@ -8,6 +8,7 @@ documentation.workspace = true [dependencies] ic-crypto-tree-hash = { path = "../../crypto/tree_hash" } +ic-utils = { path = "../../utils" } itertools = { workspace = true } leb128 = "0.2.1" scoped_threadpool = "0.1.*" diff --git a/rs/canonical_state/tree_hash/src/hash_tree.rs b/rs/canonical_state/tree_hash/src/hash_tree.rs index edf349be8afe..2aff6720e9da 100644 --- a/rs/canonical_state/tree_hash/src/hash_tree.rs +++ b/rs/canonical_state/tree_hash/src/hash_tree.rs @@ -1,8 +1,9 @@ -use crate::lazy_tree::{LazyFork, LazyTree}; +use crate::lazy_tree::{LazyTree, SubtreeSource}; use crypto::WitnessGenerationError; use ic_crypto_tree_hash::{ self as crypto, Digest, Label, LabeledTree, WitnessBuilder, hasher::Hasher, }; +use ic_utils::iter::left_outer_join; use itertools::izip; use std::fmt; use std::iter::repeat_with; @@ -16,6 +17,19 @@ const NUMBER_OF_CERTIFICATION_THREADS: u32 = 16; /// the depth of the lazy tree. const MAX_RECURSION_DEPTH: u32 = 128; +/// A fork with fewer than this many (expensive to build) children is always +/// built sequentially: it is too small for the thread pool to pay for itself. +pub const PARALLEL_MIN_CHILDREN: usize = 1000; + +/// Forks start being built sequentially. In the meantime, we track how many +/// children have been materialized and hashed — as opposed to cheaply reused +/// from the baseline. +/// +/// Once we've sampled at least this many children, if the projected number of +/// expensively built children exceeds `PARALLEL_MIN_CHILDREN`, we switch to +/// building in parallel. +const ADAPTIVE_WARMUP_CHILDREN: usize = 500; + /// SHA256 of the domain separator "ic-hashtree-empty" const EMPTY_HASH: Digest = Digest([ 0x4e, 0x3e, 0xd3, 0x5c, 0x4e, 0x2d, 0x1e, 0xe8, 0x99, 0x96, 0x48, 0x3f, 0xb6, 0x26, 0x0a, 0x64, @@ -29,13 +43,14 @@ const EMPTY_LEAF_HASH: Digest = Digest([ 0xc0, 0x2a, 0x23, 0xa5, 0x1e, 0x08, 0x98, 0xbc, 0x2c, 0x4e, 0x32, 0x3f, 0xce, 0x0e, 0x62, 0x2c, ]); -/// 30 LSBs are used to store the index -const INDEX_MASK: u32 = 0x3fff_ffff; -/// 2 MSBs are used to store the node kind -const KIND_MASK: u32 = 0xc000_0000; -const LEAF_KIND: u32 = 0x4000_0000; -const NODE_KIND: u32 = 0x8000_0000; -const FORK_KIND: u32 = 0xc000_0000; +/// 29 LSBs are used to store the index +const INDEX_MASK: u32 = 0x1fff_ffff; +/// 3 MSBs are used to store the node kind +const KIND_MASK: u32 = 0xe000_0000; +const LEAF_KIND: u32 = 0x2000_0000; +const NODE_KIND: u32 = 0x4000_0000; +const FORK_KIND: u32 = 0x6000_0000; +const STUB_KIND: u32 = 0x8000_0000; #[derive(Copy, Clone, Eq, PartialEq, Debug)] enum NodeKind { @@ -43,6 +58,12 @@ enum NodeKind { Fork, Leaf, Node, + /// A subtree reduced to a single root digest plus the [`SubtreeSource`] (source + /// `Arc`) that it was built from. When the actual subtree is needed for a + /// witness, it is materialized on demand from the source. When an unchanged + /// subtree (equal `SubtreeSource` and certification version) is found in a + /// baseline tree, its digest is reused instead of being recomputed. + Stub, } /// The position of a node in the HashTree data structure. @@ -69,6 +90,7 @@ impl fmt::Debug for NodeId { NodeKind::Fork => write!(f, "Fork({}, {})", self.bucket(), self.index()), NodeKind::Leaf => write!(f, "Leaf({}, {})", self.bucket(), self.index()), NodeKind::Node => write!(f, "Node({}, {})", self.bucket(), self.index()), + NodeKind::Stub => write!(f, "Stub({}, {})", self.bucket(), self.index()), } } } @@ -128,6 +150,21 @@ impl NodeId { } } + /// Constructs a node ID for a new `Stub` with the specified index. + #[inline] + fn stub(bucket: usize, idx: usize) -> Result { + if idx > INDEX_MASK as usize { + Err(HashTreeError::IndexOverflow) + } else { + Ok(Self { + bucket: bucket + .try_into() + .map_err(|_| HashTreeError::IndexOverflow)?, + index_and_kind: STUB_KIND | idx as u32, + }) + } + } + /// Returns the component kind of this node. #[inline] fn kind(self) -> NodeKind { @@ -136,6 +173,7 @@ impl NodeId { FORK_KIND => NodeKind::Fork, NODE_KIND => NodeKind::Node, LEAF_KIND => NodeKind::Leaf, + STUB_KIND => NodeKind::Stub, _ => NodeKind::Empty, } } @@ -210,17 +248,18 @@ impl NodeIndexRange { /// /// In this representation, the identifier of a node are two 32 bit unsigned /// integers, where the first number indexes into the (outer) vector and for -/// the second number , the 2 most significant bits are used to indicate the +/// the second number , the 3 most significant bits are used to indicate the /// type of the node: /// -/// * (0,00) is an empty tree. -/// * (0,01) is a leaf. -/// * (0,10) is a labeled node. -/// * (0,11) is a fork. +/// * (0,000) is an empty tree. +/// * (0,001) is a leaf. +/// * (0,010) is a labeled node. +/// * (0,011) is a fork. +/// * (0,100) is a reusable stub. /// -/// This means that the tree can store at most 2^30 nodes of the same type. As +/// This means that the tree can store at most 2^29 nodes of the same type. As /// each tree node has a 32-byte hash associated with it, the tree needs to -/// occupy at least 32 GiB of data before the index overflows. +/// occupy at least 16 GiB of data before the index overflows. /// /// [1]: https://en.wikipedia.org/wiki/AoS_and_SoA #[derive(Clone, Debug)] @@ -284,6 +323,33 @@ pub struct HashTree { /// INVARIANT: bucket ≤ node_labels.len() /// index_range.0 <= index_range.1 <= node_labels[bucket].len() node_children_labels_ranges: Vec>, + + /// (i,j)-th element of this array contains the stub with ID `NodeId::stub(i,j)`: + /// the subtree's root digest plus the [`SubtreeSource`] it was built from. The + /// subtree's contents are not materialized; when needed for building a witness + /// they are rebuilt on demand from the `SubtreeSource` (see + /// [`HashTree::witness`]). + stubs: Vec>, +} + +/// A reusable subtree collapsed to a single digest ("stub"), stored in a +/// [`NodeKind::Stub`] node. +/// +/// Holds one `Arc` and one function pointer (inside the [`SubtreeSource`]) plus +/// a cheap [`Digest`], so it can be stored inline, avoiding extra allocation +/// and/or indirection. +#[derive(Clone, Debug)] +struct StubNode { + /// The subtree's root digest. Its contents are not materialized; they are + /// rebuilt on demand via [`SubtreeSource::expand`] during witness generation. + digest: Digest, + + /// The source that this stub was built from (paired with its expander), used + /// both to detect that an unchanged subtree can be reused from a baseline (by + /// source identity and certification version) and to rebuild it for witnesses. + /// Holds an `Arc` into the source, keeping it alive so the identity cannot be + /// recycled (no ABA) and the source stays available for expansion. + source: SubtreeSource, } impl HashTree { @@ -306,6 +372,7 @@ impl HashTree { node_labels: vec![Default::default()], node_children: vec![Default::default()], node_children_labels_ranges: vec![Default::default()], + stubs: vec![Default::default()], } } @@ -314,9 +381,12 @@ impl HashTree { let leaf_size: usize = self.leaf_digests.iter().map(|bucket| bucket.len()).sum(); let fork_size: usize = self.fork_digests.iter().map(|bucket| bucket.len()).sum(); let node_size: usize = self.node_digests.iter().map(|bucket| bucket.len()).sum(); + let stub_size: usize = self.stubs.iter().map(|bucket| bucket.len()).sum(); - // Since this is for metrics only we don't care about potential overflows - leaf_size + fork_size + node_size + // Since this is for metrics only we don't care about potential overflows. + // Note: each stub is counted as a single node; the nodes of its + // unmaterialized subtree are not counted here. + leaf_size + fork_size + node_size + stub_size } /// Largest index in the `HashTree`. @@ -339,8 +409,32 @@ impl HashTree { .map(|bucket| bucket.len()) .max() .unwrap_or(0); + let stub_size = self + .stubs + .iter() + .map(|bucket| bucket.len()) + .max() + .unwrap_or(0); - leaf_size.max(fork_size).max(node_size) + leaf_size.max(fork_size).max(node_size).max(stub_size) + } + + /// Number of [`NodeKind::Stub`] nodes in this tree. + /// + /// Diagnostics/test only. + #[doc(hidden)] + pub fn stub_count(&self) -> usize { + self.stubs.iter().map(|bucket| bucket.len()).sum() + } + + /// The [`SubtreeSource`] of every [`NodeKind::Stub`] node (in no particular + /// order). Lets tests assert stub source identity (e.g. that reuse is by + /// pointer, not by value). + /// + /// Diagnostics/test only. + #[doc(hidden)] + pub fn stub_sources(&self) -> impl Iterator { + self.stubs.iter().flatten().map(|stub| &stub.source) } /// Note that new forks are always added to fork_digests[0], but in order @@ -369,6 +463,13 @@ impl HashTree { NodeId::leaf(self.bucket_offset, id) } + /// Constructs a new stub (either freshly hashed or reused from a baseline). + fn new_stub(&mut self, digest: Digest, source: SubtreeSource) -> Result { + let idx = self.stubs[0].len(); + self.stubs[0].push(StubNode { digest, source }); + NodeId::stub(self.bucket_offset, idx) + } + /// Preallocates `len` nodes. Makes the new nodes root if the `parent` is /// `Empty`. Returns the [`NodeIndexRange`] to the allocated nodes. fn preallocate_nodes( @@ -429,6 +530,9 @@ impl HashTree { NodeKind::Leaf => { &self.leaf_digests[node_id.bucket() - self.bucket_offset][node_id.index()] } + NodeKind::Stub => { + &self.stubs[node_id.bucket() - self.bucket_offset][node_id.index()].digest + } NodeKind::Empty => &EMPTY_HASH, } } @@ -476,6 +580,7 @@ impl HashTree { self.node_children[bucket][idx], ), NodeKind::Leaf => HashTreeView::Leaf(&self.leaf_digests[bucket][idx]), + NodeKind::Stub => HashTreeView::Stub(&self.stubs[bucket][idx].digest), NodeKind::Empty => HashTreeView::Empty, } } @@ -486,6 +591,9 @@ impl HashTree { } /// Constructs a witness for the specified partial tree. + /// + /// Where the `partial_tree` descends into a [`NodeKind::Stub`] (e.g. into a + /// canister), the subtree is built on demand from its [`SubtreeSource`]. pub fn witness( &self, partial_tree: &LabeledTree>, @@ -623,6 +731,13 @@ impl HashTree { ); B::make_pruned(digest.clone()) } + HashTreeView::Stub(digest) => { + debug_assert!( + false, + "a tree node without children must not be a stub" + ); + B::make_pruned(digest.clone()) + } }); } @@ -666,6 +781,24 @@ impl HashTree { pos: NodeId, t: &LabeledTree>, ) -> Result> { + if pos.kind() == NodeKind::Stub { + // A stub, only storing its root digest. + return match t { + // Requested partial tree descends into the subtree: rebuild it from source and + // continue witness generation there. + LabeledTree::SubTree(children) if !children.is_empty() => { + let expanded = ht.stubs[pos.bucket()][pos.index()] + .source + .expand() + .expect("expanding a stub should not fail"); + go::(&expanded, NodeId::empty(), expanded.root, t) + } + + // Witness only needs the precomputed digest. + _ => Ok(B::make_pruned(ht.digest(pos).clone())), + }; + } + match t { LabeledTree::Leaf(data) => Ok(match ht.view(pos) { HashTreeView::Leaf(_) => B::make_leaf(&data[..]), @@ -674,12 +807,16 @@ impl HashTree { B::make_node(label.clone(), B::make_pruned(ht.digest(child).clone())) } HashTreeView::Fork(digest, _left, _right) => B::make_pruned(digest.clone()), + // Intercepted above. + HashTreeView::Stub(_) => unreachable!(), }), LabeledTree::SubTree(children) if children.is_empty() => Ok(match ht.view(pos) { HashTreeView::Empty => B::make_empty(), HashTreeView::Leaf(digest) => B::make_pruned(digest.clone()), HashTreeView::Fork(digest, _left, _right) => B::make_pruned(digest.clone()), HashTreeView::Node(digest, _label, _child) => B::make_pruned(digest.clone()), + // Intercepted above. + HashTreeView::Stub(_) => unreachable!(), }), LabeledTree::SubTree(children) => children .iter() @@ -710,6 +847,9 @@ impl HashTree { self.node_children.extend(subtree.node_children); self.node_children_labels_ranges .extend(subtree.node_children_labels_ranges); + + // Reusable stubs + self.stubs.extend(subtree.stubs); } } @@ -719,6 +859,15 @@ impl PartialEq for HashTree { fn eq_recursive(ht: &HashTree, ht_root: NodeId, other: &crypto::HashTree) -> bool { ht.digest(ht_root) == other.digest() && match (ht_root.kind(), other) { + // A stub collapses a whole subtree to its root digest. Expand it from its + // source and compare the materialized subtree structurally. + (NodeKind::Stub, _) => { + let expanded = ht.stubs[ht_root.bucket()][ht_root.index()] + .source + .expand() + .expect("expanding a stub should not fail"); + eq_recursive(&expanded, expanded.root, other) + } (NodeKind::Leaf | NodeKind::Empty, crypto::HashTree::Leaf { digest: _ }) => { true } @@ -769,6 +918,8 @@ pub enum HashTreeView<'a> { Leaf(&'a Digest), Fork(&'a Digest, NodeId, NodeId), Node(&'a Digest, &'a Label, NodeId), + /// A subtree reduced to its root digest. + Stub(&'a Digest), } /// Error produced when computing hash trees @@ -780,22 +931,112 @@ pub enum HashTreeError { IndexOverflow, } +/// A cursor into a baseline [`HashTree`] that mirrors the position of the lazy +/// tree being traversed. Used to reuse subtrees with matching [`SubtreeSource`] +/// from a previously built tree, traversed in lockstep with the new tree. +#[derive(Clone, Copy)] +struct BaselineCursor<'a> { + tree: &'a HashTree, + /// The node at this position: `empty` for the root, otherwise the labeled + /// node (`kind() == Node`) reached via the edge leading here. + node: NodeId, +} + +impl<'a> BaselineCursor<'a> { + /// The subtree stored below `self.node` in the baseline tree. + fn subtree_root(&self) -> NodeId { + if self.node == NodeId::empty() { + self.tree.root + } else { + // Sanity check: a complete `HashTree` has no bucket offset. + debug_assert_eq!(self.tree.bucket_offset, 0); + + let bucket = self.node.bucket(); + self.tree.node_children[bucket][self.node.index()] + } + } + + /// If the baseline stored this position as a reusable [`NodeKind::Stub`], + /// returns the stub node. + fn stub(&self) -> Option<&'a StubNode> { + let subtree_root = self.subtree_root(); + if subtree_root.kind() == NodeKind::Stub { + // Sanity check: a complete `HashTree` has no bucket offset. + debug_assert_eq!(self.tree.bucket_offset, 0); + + Some(&self.tree.stubs[subtree_root.bucket()][subtree_root.index()]) + } else { + None + } + } + + /// Streams the children positions as `(label, cursor)` pairs, in label order. + fn children(self) -> impl Iterator)> + 'a { + let tree = self.tree; + let NodeIndexRange { + bucket, + index_range, + } = tree.node_labels_range(self.node); + index_range.map(move |idx| { + let child = NodeId::node(bucket, idx).expect("valid baseline hash tree"); + ( + &tree.node_labels[bucket][idx], + BaselineCursor { tree, node: child }, + ) + }) + } +} + /// Materializes the provided lazy tree and builds its hash tree that can be /// used to produce witnesses. +/// +/// Subtrees that carry a +/// [`LazyFork::subtree_source`](crate::lazy_tree::LazyFork::subtree_source) +/// (e.g. canisters) are collapsed to digest-only [`NodeKind::Stub`] nodes. +/// The resulting tree has the exact same root hash as a fully materialized +/// build; witnesses that descend into a stubbed subtree rebuild it on demand +/// from the [`SubtreeSource`] held in the stub (see [`HashTree::witness`]). pub fn hash_lazy_tree(t: &LazyTree<'_>) -> Result { + hash_lazy_tree_impl(t, None) +} + +/// Like [`hash_lazy_tree`], but reuses the [`NodeKind::Stub`] nodes of +/// unchanged subtrees from `baseline`. +/// +/// The new lazy tree and the baseline tree are traversed in lockstep (children +/// merge-joined by label). Wherever a child carries a [`SubtreeSource`] equal +/// to the one the baseline stores under the same label the baseline's stored +/// digest is reused instead of building and hashing the subtree. +/// +/// The result is identical (same root hash, same witnesses) to a full +/// [`hash_lazy_tree`] build, regardless of `baseline`. In particular, a +/// `baseline` built under a different certification version is safe to pass: +/// its subtrees carry a different expander, so none of them are reused (they +/// are simply rebuilt). +pub fn hash_lazy_tree_with_baseline( + t: &LazyTree<'_>, + baseline: &HashTree, +) -> Result { + hash_lazy_tree_impl(t, Some(baseline)) +} + +fn hash_lazy_tree_impl( + t: &LazyTree<'_>, + baseline: Option<&HashTree>, +) -> Result { struct SubtreeRoot { children_range: NodeIndexRange, root: NodeId, } // We only initialize thread pools lazily the first time we need them - enum ParStrategy { + enum ParallelismStrategy { Sequential, Concurrent, ConcurrentInPool(scoped_threadpool::Pool), } - impl ParStrategy { + impl ParallelismStrategy { fn pool(&mut self) -> Option<&mut scoped_threadpool::Pool> { match self { Self::Sequential => None, @@ -814,12 +1055,76 @@ pub fn hash_lazy_tree(t: &LazyTree<'_>) -> Result { } } - fn go( + /// Builds one labeled `child` of a fork (linked under `parent`), returning its + /// [`NodeId`] and whether it was expensively (re)built — i.e. materialized — + /// rather than cheaply reused from `baseline`. + /// + /// A `child` that carries a + /// [`LazyFork::subtree_source`](crate::lazy_tree::LazyFork::subtree_source) is + /// collapsed to a digest-only [`NodeKind::Stub`] — its digest reused from + /// `baseline` when the sources are equal (cheap), else rebuilt (expensive). + /// Any other `child` is materialized normally via [`build_tree`] (expensive). + fn build_child( + child: &LazyTree<'_>, + ht: &mut HashTree, + parent: NodeId, + parallelism_strategy: &mut ParallelismStrategy, + recursion_depth: u32, + baseline: Option>, + ) -> Result<(NodeId, bool), HashTreeError> { + if let LazyTree::LazyFork(f) = child + && let Some(source) = f.subtree_source() + { + // This subtree should be stubbed: store a digest-only [`NodeKind::Stub`]. + let (digest, was_built) = match baseline.and_then(|b| b.stub()) { + // Unchanged: the baseline carries an equal `SubtreeSource` — same source + // allocation *and* same expander (hence same certification version) — so its + // digest is reused (cheap). + Some(stub) if stub.source == source => (stub.digest.clone(), false), + + // New, changed, or built under a different version: build the subtree only to + // capture its root digest; if later needed for a witness, it will be rebuilt on + // demand from `source`. + _ => { + let mut child_ht = HashTree::new(); + child_ht.root = build_tree( + child, + &mut child_ht, + NodeId::empty(), + parallelism_strategy, + recursion_depth + 1, + None, + )?; + child_ht.check_invariants(); + (child_ht.root_hash().clone(), true) + } + }; + return Ok((ht.new_stub(digest, source)?, was_built)); + } + + // Materialize non-stubbed child: expensive. + let id = build_tree( + child, + ht, + parent, + parallelism_strategy, + recursion_depth + 1, + baseline, + )?; + Ok((id, true)) + } + + /// Builds the hash tree for `t`, returning the [`NodeId`] of its root. + /// + /// The hash tree is always materialized; collapsing a subtree fork into a + /// digest-only [`NodeKind::Stub`] happens one level up, in [`build_child`]. + fn build_tree( t: &LazyTree<'_>, ht: &mut HashTree, parent: NodeId, - par_strategy: &mut ParStrategy, + parallelism_strategy: &mut ParallelismStrategy, recursion_depth: u32, + baseline: Option>, ) -> Result { if recursion_depth > MAX_RECURSION_DEPTH { return Err(HashTreeError::RecursionTooDeep(MAX_RECURSION_DEPTH)); @@ -863,40 +1168,67 @@ pub fn hash_lazy_tree(t: &LazyTree<'_>) -> Result { } = ht.preallocate_nodes(num_children, parent)?; let mut nodes = Vec::with_capacity(num_children); - // We only use multithreading if the number of children is large. It is generally - // efficient to do so because the children of a given parent are of the same type - // (e.g. everything under `/canisters` is a canister state) and thus require - // similar amounts of work to materialize. + // Build the children sequentially, but watch how many have to be actually built + // (hashed) rather than cheaply reused from the baseline. After a warmup, + // extrapolate that rate over the whole fork; if it projects too much work, hand + // the *remaining* children to the thread pool. This covers both stubbed forks + // (where reuse keeps the rate low) and regular forks (where every child is + // materialized; so a large fork always parallelizes). // - // We do not pass the thread pool down after use, so we are not spawning new threads - // in a nested way. - if num_children > 100 && par_strategy.is_concurrent() { - fork_parallel( - par_strategy.pool().unwrap(), + // We only collect the unprocessed tail into a `Vec` if and when we switch; the + // common, all-sequential path uses the `joined` iterator directly. + let may_parallelize = + num_children >= PARALLEL_MIN_CHILDREN && parallelism_strategy.is_concurrent(); + let mut do_parallelize = may_parallelize && baseline.is_none(); + let mut num_processed = 0_usize; + let mut num_built = 0_usize; + + // Merge-join the children with the baseline children (a missing baseline child + // is `None`); each tagged with its preallocated node index. + let mut joined = range.zip(left_outer_join( + f.children(), + baseline.into_iter().flat_map(BaselineCursor::children), + )); + + while !do_parallelize && let Some((i, (label, child, base))) = joined.next() { + let (child, was_built) = build_child( + &child, + ht, + NodeId::node(bucket, i)?, + parallelism_strategy, + recursion_depth, + base, + )?; + + num_built += was_built as usize; + num_processed += 1; + do_parallelize |= may_parallelize + // Beyond the warmup, switch to parallel once the sampled build rate + // (`num_built / num_processed`) projects more than the number of children + // required for parallel processing over all `num_children` (rearranged to avoid + // division). + && num_processed >= ADAPTIVE_WARMUP_CHILDREN + && num_built * num_children >= PARALLEL_MIN_CHILDREN * num_processed; + + let mut h = Hasher::for_domain("ic-hashtree-labeled"); + h.update(label.as_bytes()); + h.update(ht.digest(child).as_bytes()); + ht.node_digests[0][i] = h.finalize(); + ht.node_children[0][i] = child; + ht.node_labels[0][i] = label; + nodes.push(NodeId::node(bucket, i)?); + } + + // Build whatever is left of the children in parallel. + if do_parallelize { + build_fork_parallel( + parallelism_strategy.pool().unwrap(), ht, &mut nodes, - f, recursion_depth, bucket, - &range, + joined.collect(), )?; - } else { - for (i, (label, child)) in range.zip(f.children()) { - let child = go( - &child, - ht, - NodeId::node(bucket, i)?, - par_strategy, - recursion_depth + 1, - )?; - let mut h = Hasher::for_domain("ic-hashtree-labeled"); - h.update(label.as_bytes()); - h.update(ht.digest(child).as_bytes()); - ht.node_digests[0][i] = h.finalize(); - ht.node_children[0][i] = child; - ht.node_labels[0][i] = label; - nodes.push(NodeId::node(bucket, i)?); - } } if nodes.len() == 1 { @@ -928,21 +1260,26 @@ pub fn hash_lazy_tree(t: &LazyTree<'_>) -> Result { } } - /// Does the same as the single-threaded else branch, but using multiple threads - fn fork_parallel( + /// Builds the given `tail` of a fork's children across the thread pool, + /// writing the resulting labeled nodes into `ht` and appending their + /// [`NodeId`]s to `nodes` (in `tail` order). + /// + /// Each `tail` entry is `(i, (label, child, base))`, where `i` is the child's + /// preallocated node index and `base` is its baseline counterpart (already + /// merge-joined by the caller). + #[allow(clippy::type_complexity)] + fn build_fork_parallel( thread_pool: &mut scoped_threadpool::Pool, ht: &mut HashTree, nodes: &mut Vec, - fork_f: &std::sync::Arc, depth: u32, bucket: usize, - range: &Range, + tail: Vec<(usize, (Label, LazyTree<'_>, Option>))>, ) -> Result<(), HashTreeError> { let bucket_offset = ht.node_children.len(); let threads = thread_pool.thread_count() as usize; - let children: Vec<_> = fork_f.children().collect(); debug_assert!(threads > 0); - let per_thread = ((children + let per_thread = ((tail .len() .checked_add(threads) .ok_or(HashTreeError::IndexOverflow)? @@ -953,9 +1290,10 @@ pub fn hash_lazy_tree(t: &LazyTree<'_>) -> Result { let mut roots: Vec> = repeat_with(|| Vec::with_capacity(per_thread)) .take(threads) .collect(); + thread_pool.scoped(|scope| { for (i, (children, subtree, roots)) in izip!( - children.chunks(per_thread), + tail.chunks(per_thread), subtrees.iter_mut(), roots.iter_mut() ) @@ -970,21 +1308,34 @@ pub fn hash_lazy_tree(t: &LazyTree<'_>) -> Result { // lookup based on NodeId. let mut ht = HashTree::new_with_bucket_offset(bucket_offset + i); let mut error: Option = None; - for (_, child) in children { + for (_i, (_label, child, base)) in children { // Since the parent is outside of `ht`, we set the parent to NodeId::empty() - // and fix the link from `root` to the parent later - let root = go( + // and fix the link from `root` to the parent later. A child that carries a + // `subtree_source` is collapsed to a stub here. + // + // A stub has no materialized labeled children of its own, so its + // `children_range` is empty (and is never consulted: stubs are descended into + // via their source during witness generation). + match build_child( child, &mut ht, NodeId::empty(), - &mut ParStrategy::Sequential, - depth + 1, - ); - match root { - Ok(root) => { + // Run with `ParallelismStrategy::Sequential`: besides avoiding nested thread + // pools, this limits each worker's tree to a single bucket, which + // `splice_subtree` relies on to place worker `i` at bucket `bucket_offset + i`. + &mut ParallelismStrategy::Sequential, + depth, + *base, + ) { + Ok((root, _was_built)) => { + let children_range = if root.kind() == NodeKind::Stub { + NodeIndexRange::default() + } else { + ht.root_labels_range.clone() + }; roots.push(SubtreeRoot { root, - children_range: ht.root_labels_range.clone(), + children_range, }); } Err(err) => { @@ -1004,7 +1355,8 @@ pub fn hash_lazy_tree(t: &LazyTree<'_>) -> Result { for subtree in subtrees.into_iter().flatten() { ht.splice_subtree(subtree?); } - for (i, (label, _), root) in izip!(range.clone(), children, roots.into_iter().flatten()) { + for ((i, (label, _child, _base)), root) in tail.into_iter().zip(roots.into_iter().flatten()) + { ht.node_children_labels_ranges[bucket][i] = root.children_range; let mut h = Hasher::for_domain("ic-hashtree-labeled"); h.update(label.as_bytes()); @@ -1017,9 +1369,18 @@ pub fn hash_lazy_tree(t: &LazyTree<'_>) -> Result { Ok(()) } - let mut ht = HashTree::new(); - ht.root = go(t, &mut ht, NodeId::empty(), &mut ParStrategy::Concurrent, 0)?; + let baseline = baseline.map(|tree| BaselineCursor { + tree, + node: NodeId::empty(), + }); + let mut ht = HashTree::new(); + let strategy = &mut ParallelismStrategy::Concurrent; + // The root is always materialized; only *descendants* that carry a + // `subtree_source` are collapsed into stubs (see `build_child`). Building a + // stand-alone subtree is just `hash_lazy_tree` on that subtree's root, which + // is in turn materialized for the same reason. + ht.root = build_tree(t, &mut ht, NodeId::empty(), strategy, 0, baseline)?; ht.check_invariants(); Ok(ht) diff --git a/rs/canonical_state/tree_hash/src/lazy_tree.rs b/rs/canonical_state/tree_hash/src/lazy_tree.rs index fd76d0a3cdc6..bcbf77ebcb49 100644 --- a/rs/canonical_state/tree_hash/src/lazy_tree.rs +++ b/rs/canonical_state/tree_hash/src/lazy_tree.rs @@ -8,6 +8,8 @@ pub mod materialize; use ic_crypto_tree_hash::Label; +use std::any::Any; +use std::fmt; use std::sync::Arc; /// A hash of the tree leaf contents according to the IC interface spec. See @@ -54,6 +56,22 @@ pub trait LazyFork<'a>: Send + Sync { fn is_empty(&self) -> bool { self.len() == 0 } + + /// The source that the subtree rooted at this fork is derived from, including + /// the [`SubtreeExpander`] that rebuilds it; produced iff the subtree should + /// be collapsed to a digest-only, reusable subtree node in the + /// [`HashTree`](crate::hash_tree::HashTree). + /// + /// Defaults to `None` (materialize the subtree inline). Forks that wrap shared, + /// copy-on-write state (e.g. an `Arc`) should override this to + /// return that `Arc`, together with an expander that bakes in the certification + /// version, as a [`SubtreeSource`]. Such subtrees are hashed once and, when an + /// unchanged subtree (same source) is found in a baseline tree, its digest is + /// reused instead of being recomputed. See + /// [`hash_lazy_tree_with_baseline`](crate::hash_tree::hash_lazy_tree_with_baseline). + fn subtree_source(&self) -> Option { + None + } } /// A tree that can lazily expand while it's being traversed. @@ -116,3 +134,94 @@ pub fn follow_path<'a>(t: &LazyTree<'a>, path: &[&[u8]]) -> Option> _ => None, } } + +/// An owned, type-erased handle to the source that a reusable lazy subtree was +/// derived from (e.g. an `Arc`), paired with the +/// [`SubtreeExpander`] that rebuilds the subtree from it. +/// +/// The held `Arc` keeps the source allocation alive, so its address cannot be +/// recycled for a different object while the handle exists (no ABA), and the +/// source stays available to [`expand`](Self::expand) the subtree for witnesses. +/// +/// Equality is a conservative reuse-gate, *not* a general-purpose comparison: +/// two `SubtreeSource`s are equal iff they point to the same source allocation +/// **and** carry the same expander. The expander encodes the producer's +/// certification version (baked into a version-specific monomorphization), so +/// equality implies the two subtrees would hash identically. The function +/// pointer comparison ([`std::ptr::fn_addr_eq`]) is best-effort: it may report +/// `false` for two pointers that are in fact the same function, but never `true` +/// for genuinely different ones. The sole consumer (baseline reuse) treats +/// inequality as "rebuild the subtree", so a false negative only costs a +/// recomputation and never compromises correctness. +#[derive(Clone)] +pub struct SubtreeSource { + source: Arc, + expander: SubtreeExpander, +} + +/// Rebuilds a stubbed subtree's [`HashTree`](crate::hash_tree::HashTree) from +/// its type-erased [`SubtreeSource`], by [downcasting](SubtreeSource::downcast) +/// the held `Arc` back to its concrete source and re-materializing it. Used to +/// expand a [`NodeKind::Stub`](crate::hash_tree::HashTree) on demand during +/// witness generation. +/// +/// It is a plain function pointer (not a closure), so the producer of the stub +/// must bake the certification version into it. The pointer alone fully +/// determines the expansion so it can be safely used as a conservative equality +/// gate for subtree reuse. +pub type SubtreeExpander = + fn(&SubtreeSource) -> Result; + +impl SubtreeSource { + /// Creates a handle that shares ownership of the subtree's `source` and can + /// rebuild the subtree from it, via the `expander`. + pub fn new(source: &Arc, expander: SubtreeExpander) -> Self { + let this = Self { + source: Arc::clone(source) as Arc, + expander, + }; + debug_assert!(expander(&this).is_ok()); + this + } + + /// The bare address of the source allocation, used for identity comparison. + fn addr(&self) -> *const () { + Arc::as_ptr(&self.source) as *const () + } + + /// Recovers shared ownership of the source as an `Arc`. Used by a + /// [`SubtreeExpander`] to rebuild the subtree from its source. + /// + /// Panics if this handle was not created from an `Arc`. + pub fn downcast(&self) -> Arc { + Arc::clone(&self.source) + .downcast::() + .unwrap_or_else(|_| { + panic!( + "subtree source is not an Arc<{}>", + std::any::type_name::() + ) + }) + } + + /// Rebuilds the subtree's [`HashTree`](crate::hash_tree::HashTree) from this + /// source, to expand a stub on demand during witness generation. + pub fn expand(&self) -> Result { + (self.expander)(self) + } +} + +impl PartialEq for SubtreeSource { + /// A conservative, false-negative-only reuse-gate; see the type-level note. + fn eq(&self, other: &Self) -> bool { + self.addr() == other.addr() && std::ptr::fn_addr_eq(self.expander, other.expander) + } +} + +impl Eq for SubtreeSource {} + +impl fmt::Debug for SubtreeSource { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "SubtreeSource({:p})", self.addr()) + } +} diff --git a/rs/canonical_state/tree_hash/tests/subtree.rs b/rs/canonical_state/tree_hash/tests/subtree.rs new file mode 100644 index 000000000000..96c4f3ad8d1b --- /dev/null +++ b/rs/canonical_state/tree_hash/tests/subtree.rs @@ -0,0 +1,433 @@ +//! Tests for reusable subtree (stub) nodes. +//! +//! When building a [`HashTree`] from a [`LazyTree`], every subtree that carries +//! a [`LazyFork::subtree_source`] (here a per-canister fork, mirroring `CanisterFork` +//! in production) is collapsed to a digest-only [`NodeKind::Stub`]. +//! Such a tree: +//! +//! * has the exact same root hash as a fully materialized build, +//! * serves witnesses by expanding the stub on demand from the source `Arc` +//! it holds (via its [`SubtreeExpander`]), with no external source, and +//! * when built with a baseline ([`hash_lazy_tree_with_baseline`]), reuses the +//! stored digest of every unchanged subtree (matched by `SubtreeSource`). + +use ic_canonical_state_tree_hash::hash_tree::{ + HashTree, HashTreeError, PARALLEL_MIN_CHILDREN, hash_lazy_tree, hash_lazy_tree_with_baseline, +}; +use ic_canonical_state_tree_hash::lazy_tree::{LazyFork, LazyTree, SubtreeSource, fork}; +use ic_canonical_state_tree_hash_test_utils::as_lazy; +use ic_crypto_tree_hash::{FlatMap, Label, LabeledTree, MixedHashTree, Witness, flatmap}; +use std::collections::BTreeMap; +use std::sync::Arc; + +const CANISTER_LABEL: &[u8] = b"canister"; +const TIME_LABEL: &[u8] = b"time"; + +/// Number of canisters; `> PARALLEL_MIN_CHILDREN` so that the parallel build +/// path is exercised. +const NUM_CANISTERS: usize = PARALLEL_MIN_CHILDREN * 2; + +const TIME: &[u8] = &[1, 2, 3, 4]; + +fn certified_data(i: usize) -> Vec { + vec![i as u8; 4] +} +fn controllers(i: usize) -> Vec { + format!("controllers-{i}").into_bytes() +} +fn custom_section(i: usize) -> Vec { + format!("section-{i}").into_bytes() +} +fn module_hash(i: usize) -> Vec { + vec![(i % 251) as u8; 32] +} + +fn canister_id_label(i: usize) -> Label { + Label::from(format!("{i:04}")) +} + +/// The certified subtree of a single canister (mirrors the real canonical +/// encoding: certified_data, controllers, metadata, module_hash). +fn canister_subtree(i: usize) -> LabeledTree> { + LabeledTree::SubTree(flatmap! { + Label::from("certified_data") => LabeledTree::Leaf(certified_data(i)), + Label::from("controllers") => LabeledTree::Leaf(controllers(i)), + Label::from("metadata") => LabeledTree::SubTree(flatmap!{ + Label::from("public_section") => LabeledTree::Leaf(custom_section(i)), + }), + Label::from("module_hash") => LabeledTree::Leaf(module_hash(i)), + }) +} + +/// A collection of canisters, each behind its own `Arc` (as in production). +type Canisters = BTreeMap>>>; + +fn canisters() -> Canisters { + (0..NUM_CANISTERS) + .map(|i| (canister_id_label(i), Arc::new(canister_subtree(i)))) + .collect() +} + +/// A `LazyFork` over the certified subtree of a single canister. +/// +/// `subtree_source` mirrors `CanisterFork::subtree_source` in production: it +/// returns the backing `Arc`, so each canister is stored as a self-contained, +/// reusable subtree node. +struct CanisterArcFork<'a> { + canister: &'a Arc>>, +} + +impl<'a> CanisterArcFork<'a> { + fn children_map(&self) -> &'a FlatMap>> { + match &**self.canister { + LabeledTree::SubTree(cs) => cs, + LabeledTree::Leaf(_) => panic!("a canister must be a subtree"), + } + } +} + +impl<'a> LazyFork<'a> for CanisterArcFork<'a> { + fn edge(&self, l: &Label) -> Option> { + self.children_map().get(l).map(as_lazy) + } + + fn labels(&self) -> Box + '_> { + Box::new(self.children_map().keys().iter().cloned()) + } + + fn children(&self) -> Box)> + 'a> { + Box::new( + self.children_map() + .iter() + .map(|(l, t)| (l.clone(), as_lazy(t))), + ) + } + + fn len(&self) -> usize { + self.children_map().len() + } + + fn subtree_source(&self) -> Option { + Some(SubtreeSource::new(self.canister, expand_test_canister)) + } +} + +/// Rebuilds a test canister's stubbed subtree from its `SubtreeSource` (mirrors +/// `expand_canister` in production, minus the certification version). +fn expand_test_canister(source: &SubtreeSource) -> Result { + let canister = source.downcast::>>(); + hash_lazy_tree(&canister_fork(&canister)) +} + +/// A `LazyFork` over the `/canister` subtree. +struct CanistersFork<'a> { + canisters: &'a Canisters, +} + +fn canister_fork(arc: &Arc>>) -> LazyTree<'_> { + fork(CanisterArcFork { canister: arc }) +} + +impl<'a> LazyFork<'a> for CanistersFork<'a> { + fn edge(&self, l: &Label) -> Option> { + self.canisters.get(l).map(canister_fork) + } + + fn labels(&self) -> Box + '_> { + Box::new(self.canisters.keys().cloned()) + } + + fn children(&self) -> Box)> + 'a> { + Box::new( + self.canisters + .iter() + .map(|(l, arc)| (l.clone(), canister_fork(arc))), + ) + } + + fn len(&self) -> usize { + self.canisters.len() + } +} + +/// The top-level state fork: `{canister: {...}, time: }`. +struct StateFork<'a> { + canisters: &'a Canisters, + time: &'a [u8], +} + +fn canisters_fork(canisters: &Canisters) -> LazyTree<'_> { + fork(CanistersFork { canisters }) +} + +impl<'a> LazyFork<'a> for StateFork<'a> { + fn edge(&self, l: &Label) -> Option> { + match l.as_bytes() { + CANISTER_LABEL => Some(canisters_fork(self.canisters)), + TIME_LABEL => Some(LazyTree::Blob(self.time, None)), + _ => None, + } + } + + fn labels(&self) -> Box + '_> { + Box::new([Label::from(CANISTER_LABEL), Label::from(TIME_LABEL)].into_iter()) + } + + fn children(&self) -> Box)> + 'a> { + Box::new( + [ + (Label::from(CANISTER_LABEL), canisters_fork(self.canisters)), + (Label::from(TIME_LABEL), LazyTree::Blob(self.time, None)), + ] + .into_iter(), + ) + } + + fn len(&self) -> usize { + 2 + } +} + +/// A `LazyTree` over the whole state. +fn state_tree<'a>(canisters: &'a Canisters, time: &'a [u8]) -> LazyTree<'a> { + fork(StateFork { canisters, time }) +} + +/// Asserts that `tree` produces exactly the same witness (both as +/// `MixedHashTree` and `Witness`) as the `reference` full build. Stubbed +/// subtrees expand themselves from the source `Arc` they hold. +fn assert_same_witness(reference: &HashTree, tree: &HashTree, partial: &LabeledTree>) { + let reference_mixed = reference + .witness::(partial) + .expect("reference MixedHashTree"); + let tree_mixed = tree + .witness::(partial) + .expect("MixedHashTree"); + assert_eq!( + reference_mixed, tree_mixed, + "MixedHashTree mismatch for partial {partial:?}" + ); + assert_eq!( + &tree_mixed.digest(), + reference.root_hash(), + "witness digest mismatch for partial {partial:?}" + ); + + let reference_witness = reference + .witness::(partial) + .expect("reference Witness"); + let tree_witness = tree.witness::(partial).expect("Witness"); + assert_eq!( + reference_witness, tree_witness, + "Witness mismatch for partial {partial:?}" + ); +} + +/// Builds a partial tree `{canister: {: inner}}`. +fn canister_query(i: usize, inner: LabeledTree>) -> LabeledTree> { + LabeledTree::SubTree(flatmap! { + Label::from(CANISTER_LABEL) => LabeledTree::SubTree(flatmap!{ + canister_id_label(i) => inner, + }), + }) +} + +/// The full canister subtree of canister `i`, used to request witnesses for +/// every leaf. +fn canister_partial(i: usize) -> LabeledTree> { + canister_query(i, canister_subtree(i)) +} + +#[test] +fn every_canister_is_a_subtree() { + let canisters = canisters(); + let tree = hash_lazy_tree(&state_tree(&canisters, TIME)).unwrap(); + + assert_eq!( + tree.stub_count(), + NUM_CANISTERS, + "every canister should be stored as a stub" + ); +} + +#[test] +fn witnesses_into_canisters_expand_from_source() { + let canisters = canisters(); + let source = state_tree(&canisters, TIME); + let tree = hash_lazy_tree(&source).unwrap(); + + // Whole-canister witnesses across both the sequential and parallel ranges. + for i in [ + 0usize, + 1, + PARALLEL_MIN_CHILDREN - 1, + PARALLEL_MIN_CHILDREN + 1, + NUM_CANISTERS - 1, + ] { + let partial = canister_partial(i); + let mixed = tree + .witness::(&partial) + .expect("witness expanded from source"); + assert_eq!(&mixed.digest(), tree.root_hash()); + // The requested leaves must be present (not pruned). + assert!( + mixed + .lookup(&[ + CANISTER_LABEL, + canister_id_label(i).as_bytes(), + b"module_hash" + ]) + .is_found(), + "expected canister {i} module_hash in the witness" + ); + } + + // A single leaf inside a canister. + let partial = canister_query( + 77, + LabeledTree::SubTree(flatmap! { + Label::from("module_hash") => LabeledTree::Leaf(module_hash(77)), + }), + ); + let mixed = tree.witness::(&partial).unwrap(); + assert_eq!(&mixed.digest(), tree.root_hash()); +} + +#[test] +fn absence_witnesses_expand_from_source() { + let canisters = canisters(); + let source = state_tree(&canisters, TIME); + let tree = hash_lazy_tree(&source).unwrap(); + + // Absent canister id (proven at the `/canister` node, no stub descent). + let partial = LabeledTree::SubTree(flatmap! { + Label::from(CANISTER_LABEL) => LabeledTree::SubTree(flatmap!{ + Label::from("zzzz") => LabeledTree::Leaf(vec![]), + }), + }); + let mixed = tree.witness::(&partial).unwrap(); + assert_eq!(&mixed.digest(), tree.root_hash()); + assert!( + mixed.lookup(&[CANISTER_LABEL, b"zzzz"]).is_absent(), + "expected absence proof, got {mixed:?}" + ); + + // Absent label *inside* a canister (descends into the subtree stub). + for i in [3usize, 110] { + let partial = canister_query( + i, + LabeledTree::SubTree(flatmap! { + Label::from("nonexistent") => LabeledTree::Leaf(vec![]), + }), + ); + let mixed = tree.witness::(&partial).unwrap(); + assert_eq!(&mixed.digest(), tree.root_hash()); + assert!( + mixed + .lookup(&[ + CANISTER_LABEL, + canister_id_label(i).as_bytes(), + b"nonexistent" + ]) + .is_absent(), + "expected absence proof inside canister {i}, got {mixed:?}" + ); + } +} + +/// Building with a baseline yields a tree identical to one built from scratch. +#[test] +fn baseline_build_matches_from_scratch() { + let canisters = canisters(); + let baseline = hash_lazy_tree(&state_tree(&canisters, TIME)).unwrap(); + + // Mutate a single canister (fresh `Arc`) and change `time`; keep the rest. + let mut next = canisters.clone(); + next.insert(canister_id_label(50), Arc::new(canister_subtree(9999))); + let new_time: &[u8] = &[9, 9, 9, 9]; + + let from_scratch = hash_lazy_tree(&state_tree(&next, new_time)).unwrap(); + let with_baseline = + hash_lazy_tree_with_baseline(&state_tree(&next, new_time), &baseline).unwrap(); + + assert_eq!( + from_scratch.root_hash(), + with_baseline.root_hash(), + "baseline build must have the same root hash as a from-scratch build" + ); + + // Witnesses must match between the two builds for the changed canister + // (whose contents are now those of `canister_subtree(9999)`), an unchanged + // canister, and the changed `time` leaf. Stubs expand themselves. + for partial in [ + canister_query(50, canister_subtree(9999)), + canister_partial(7), + LabeledTree::SubTree(flatmap! { + Label::from(TIME_LABEL) => LabeledTree::Leaf(new_time.to_vec()), + }), + ] { + assert_same_witness(&from_scratch, &with_baseline, &partial); + } +} + +/// Building with a baseline (reusing the stored digest of every unchanged +/// canister, rebuilding only the changed one) must produce exactly the same tree +/// as a from-scratch build. Digest reuse is an internal optimization and is not +/// observable in the result. +#[test] +fn baseline_build_with_partial_change_matches_from_scratch() { + let canisters = canisters(); + let baseline = hash_lazy_tree(&state_tree(&canisters, TIME)).unwrap(); + + // A `BTreeMap` clone shares the canister `Arc`s; only canister 50 gets a + // fresh `Arc` (a real mutation), so only it is rebuilt. + let mut next = canisters.clone(); + next.insert(canister_id_label(50), Arc::new(canister_subtree(50))); + + let with_baseline = hash_lazy_tree_with_baseline(&state_tree(&next, TIME), &baseline).unwrap(); + let from_scratch = hash_lazy_tree(&state_tree(&next, TIME)).unwrap(); + + assert_eq!(with_baseline.stub_count(), NUM_CANISTERS); + assert_eq!(with_baseline.root_hash(), from_scratch.root_hash()); +} + +/// Whether `a` and `b` hold the same stub [`SubtreeSource`]s by identity: each +/// canister's stub points to the same source `Arc` in both trees. +fn same_stub_sources(a: &HashTree, b: &HashTree) -> bool { + // Stubs are stored in label order in every tree + a.stub_sources().eq(b.stub_sources()) +} + +/// Whether every canister's stub in `a` references a *different* source `Arc` +/// than its counterpart in `b` (i.e. nothing could have been reused by identity). +fn disjoint_stub_sources(a: &HashTree, b: &HashTree) -> bool { + a.stub_count() == b.stub_count() && a.stub_sources().zip(b.stub_sources()).all(|(x, y)| x != y) +} + +/// Reuse is by identity, not by value: replacing every canister with a fresh +/// `Arc` of identical contents skips all digest reuse, yet still yields the same +/// root hash (the canonical encoding depends only on the contents). +#[test] +fn reuse_is_by_identity_not_by_value() { + let canisters = canisters(); + let baseline = hash_lazy_tree(&state_tree(&canisters, TIME)).unwrap(); + + // Rebuilding against the baseline with the *same* `Arc`s: every stub holds the + // very same source allocation as the baseline (identity is preserved). + let unchanged = hash_lazy_tree_with_baseline(&state_tree(&canisters, TIME), &baseline).unwrap(); + assert!(same_stub_sources(&baseline, &unchanged)); + + // Replace *every* canister with a fresh `Arc` of the same contents. + let next: Canisters = (0..NUM_CANISTERS) + .map(|i| (canister_id_label(i), Arc::new(canister_subtree(i)))) + .collect(); + + let with_baseline = hash_lazy_tree_with_baseline(&state_tree(&next, TIME), &baseline).unwrap(); + + // No stub shares a source `Arc` with the baseline, so no digest could have been + // reused (every `Arc` is fresh, so no identity matches). + assert!(disjoint_stub_sources(&baseline, &with_baseline)); + + // Same root hash nonetheless: the canonical encoding depends only on contents. + assert_eq!(with_baseline.root_hash(), baseline.root_hash()); +} diff --git a/rs/state_manager/benches/bench_traversal.rs b/rs/state_manager/benches/bench_traversal.rs index fe65023b79d9..2de20e716075 100644 --- a/rs/state_manager/benches/bench_traversal.rs +++ b/rs/state_manager/benches/bench_traversal.rs @@ -1,8 +1,8 @@ +use criterion::measurement::Measurement; use criterion::{BatchSize, BenchmarkId, Criterion, black_box}; -use criterion_time::ProcessTime; -use ic_base_types::NumBytes; +use ic_base_types::{NumBytes, NumSeconds}; use ic_canonical_state::{lazy_tree_conversion::replicated_state_as_lazy_tree, traverse}; -use ic_canonical_state_tree_hash::hash_tree::hash_lazy_tree; +use ic_canonical_state_tree_hash::hash_tree::{hash_lazy_tree, hash_lazy_tree_with_baseline}; use ic_canonical_state_tree_hash_test_utils::{build_witness_gen, crypto_hash_lazy_tree}; use ic_certification_version::CURRENT_CERTIFICATION_VERSION; use ic_crypto_tree_hash::{FlatMap, Label, LabeledTree, MixedHashTree, WitnessGenerator, flatmap}; @@ -15,7 +15,7 @@ use ic_replicated_state::{ }; use ic_state_manager::labeled_tree_visitor::LabeledTreeVisitor; use ic_state_manager::{stream_encoding::encode_stream_slice, tree_hash::hash_state}; -use ic_test_utilities_state::{get_initial_state, get_running_canister}; +use ic_test_utilities_state::{get_initial_state, new_canister_state_with_execution}; use ic_test_utilities_types::{ ids::{canister_test_id, message_test_id, subnet_test_id, user_test_id}, messages::{RequestBuilder, ResponseBuilder}, @@ -30,9 +30,9 @@ use ic_types_cycles::Cycles; use maplit::btreemap; use std::sync::Arc; -fn bench_traversal(c: &mut Criterion) { +fn bench_traversal(c: &mut Criterion) { const NUM_STREAM_MESSAGES: u64 = 1_000; - const NUM_CANISTERS: u64 = 10_000; + const NUM_CANISTERS: u64 = 500_000; const NUM_STATUSES: u64 = 30_000; let subnet_type = SubnetType::Application; @@ -72,7 +72,12 @@ fn bench_traversal(c: &mut Criterion) { }); for i in 0..NUM_CANISTERS { - state.put_canister_state(get_running_canister(canister_test_id(i))); + state.put_canister_state(new_canister_state_with_execution( + canister_test_id(i), + canister_test_id(i).get(), + Cycles::zero(), + NumSeconds::from(1000), + )); } let user_id = user_test_id(1); @@ -135,8 +140,25 @@ fn bench_traversal(c: &mut Criterion) { }); c.bench_function("traverse/hash_tree_new", |b| { + let mut tree = None; b.iter(|| { - black_box(hash_lazy_tree(&replicated_state_as_lazy_tree(&state, height)).unwrap()) + tree = Some(black_box( + hash_lazy_tree(&replicated_state_as_lazy_tree(&state, height)).unwrap(), + )); + }); + std::mem::drop(tree); + }); + + let baseline = hash_lazy_tree(&replicated_state_as_lazy_tree(&state, height)).unwrap(); + c.bench_function("traverse/hash_tree_cached", |b| { + b.iter(|| { + black_box( + hash_lazy_tree_with_baseline( + &replicated_state_as_lazy_tree(&state, height), + &baseline, + ) + .unwrap(), + ) }) }); @@ -278,10 +300,7 @@ fn bench_traversal(c: &mut Criterion) { } fn main() { - let mut c = Criterion::default() - .with_measurement(ProcessTime::UserTime) - .sample_size(20) - .configure_from_args(); + let mut c = Criterion::default().sample_size(20).configure_from_args(); bench_traversal(&mut c); c.final_summary(); }