From 5ff9eb25a2c94ca3024fe04ab99ce7940558318d Mon Sep 17 00:00:00 2001 From: Khagan Karimov Date: Wed, 11 Feb 2026 21:04:08 -0700 Subject: [PATCH 1/6] Add struct subtypes --- crates/fuzzing/src/generators/gc_ops/mod.rs | 1 + .../fuzzing/src/generators/gc_ops/mutator.rs | 88 +++--- crates/fuzzing/src/generators/gc_ops/ops.rs | 41 ++- crates/fuzzing/src/generators/gc_ops/scc.rs | 212 +++++++++++++ crates/fuzzing/src/generators/gc_ops/tests.rs | 81 ++++- crates/fuzzing/src/generators/gc_ops/types.rs | 293 +++++++++++++++++- 6 files changed, 666 insertions(+), 50 deletions(-) create mode 100644 crates/fuzzing/src/generators/gc_ops/scc.rs diff --git a/crates/fuzzing/src/generators/gc_ops/mod.rs b/crates/fuzzing/src/generators/gc_ops/mod.rs index 9a492cbee106..5dfd8821b3a3 100644 --- a/crates/fuzzing/src/generators/gc_ops/mod.rs +++ b/crates/fuzzing/src/generators/gc_ops/mod.rs @@ -3,6 +3,7 @@ pub mod limits; pub mod mutator; pub mod ops; +pub mod scc; pub mod types; #[cfg(test)] diff --git a/crates/fuzzing/src/generators/gc_ops/mutator.rs b/crates/fuzzing/src/generators/gc_ops/mutator.rs index 9ff1761777d5..e308bb7f1b1d 100644 --- a/crates/fuzzing/src/generators/gc_ops/mutator.rs +++ b/crates/fuzzing/src/generators/gc_ops/mutator.rs @@ -4,13 +4,14 @@ use crate::generators::gc_ops::ops::{GcOp, GcOps}; use crate::generators::gc_ops::types::{RecGroupId, TypeId}; use mutatis::{Candidates, Context, DefaultMutate, Generate, Mutate, Result as MutResult}; use smallvec::SmallVec; +use std::collections::BTreeMap; /// A mutator for the gc ops. #[derive(Debug)] pub struct GcOpsMutator; impl GcOpsMutator { - // Define a mutation that adds an operation to the ops list. + /// Define a mutation that adds an operation to the ops list. fn add_operation(&mut self, c: &mut Candidates<'_>, ops: &mut GcOps) -> mutatis::Result<()> { if c.shrink() { return Ok(()); @@ -26,7 +27,7 @@ impl GcOpsMutator { Ok(()) } - // Define a mutation that removes an operation from the ops list. + /// Define a mutation that removes an operation from the ops list. fn remove_operation(&mut self, c: &mut Candidates<'_>, ops: &mut GcOps) -> mutatis::Result<()> { if ops.ops.is_empty() { return Ok(()); @@ -40,7 +41,7 @@ impl GcOpsMutator { Ok(()) } - // Define a mutation that adds an empty struct type to an existing (rec ...) group. + /// Define a mutation that adds an empty struct type to an existing (rec ...) group. fn add_new_struct_type_to_rec_group( &mut self, c: &mut Candidates<'_>, @@ -59,15 +60,23 @@ impl GcOpsMutator { .copied() .expect("rec_groups not empty"); let new_tid = ops.types.fresh_type_id(ctx.rng()); - ops.types.insert_empty_struct(new_tid, group_id); + let is_final = (ctx.rng().gen_u32() % 4) == 0; + let keys: Vec = ops.types.type_defs.keys().copied().collect(); + let supertype = if keys.is_empty() { + None + } else { + ctx.rng().choose(&keys).copied() + }; + ops.types + .insert_empty_struct(new_tid, group_id, is_final, supertype); log::debug!("Added empty struct type {new_tid:?} to rec group {group_id:?}"); Ok(()) })?; Ok(()) } - // Define a mutation that removes a struct type from an existing (rec ...). - // It may result in empty rec groups. Empty rec groups are allowed. + /// Define a mutation that removes a struct type from an existing (rec ...). + /// It may result in empty rec groups. Empty rec groups are allowed. fn remove_struct_type_from_rec_group( &mut self, c: &mut Candidates<'_>, @@ -82,6 +91,7 @@ impl GcOpsMutator { .choose(ops.types.type_defs.keys()) .copied() .expect("type_defs not empty"); + ops.types.type_defs.remove(&tid); log::debug!("Removed struct type {tid:?}"); Ok(()) @@ -89,7 +99,7 @@ impl GcOpsMutator { Ok(()) } - // Define a mutation that moves a struct type within an existing rec group. + /// Define a mutation that moves a struct type within an existing rec group. fn move_struct_type_within_rec_group( &mut self, c: &mut Candidates<'_>, @@ -154,9 +164,9 @@ impl GcOpsMutator { Ok(()) } - // Define a mutation that moves a struct type from one (rec ...) group to another. - // It will be a different rec group with high probability but it may try - // to move it to the same rec group. + /// Define a mutation that moves a struct type from one (rec ...) group to another. + /// It will be a different rec group with high probability but it may try + /// to move it to the same rec group. fn move_struct_type_between_rec_groups( &mut self, c: &mut Candidates<'_>, @@ -184,7 +194,7 @@ impl GcOpsMutator { Ok(()) } - // Define a mutation that duplicates a (rec ...) group. + /// Define a mutation that duplicates a (rec ...) group. fn duplicate_rec_group( &mut self, c: &mut Candidates<'_>, @@ -197,45 +207,54 @@ impl GcOpsMutator { { return Ok(()); } + c.mutation(|ctx| { - let source_gid = ctx + let source_gid = *ctx .rng() .choose(&ops.types.rec_groups) - .copied() .expect("rec_groups not empty"); + // Collect (TypeId, is_final, supertype) for members of the source group. + let mut members: SmallVec<[(TypeId, bool, Option); 32]> = SmallVec::new(); + for (tid, def) in ops.types.type_defs.iter() { + if def.rec_group == source_gid { + members.push((*tid, def.is_final, def.supertype)); + } + } + // Create a new rec group. let new_gid = ops.types.fresh_rec_group_id(ctx.rng()); ops.types.insert_rec_group(new_gid); - let count = ops - .types - .type_defs - .values() - .filter(|def| def.rec_group == source_gid) - .count(); - - // Skip empty rec groups. - if count == 0 { - return Ok(()); + // Allocate fresh type ids for each member. + // We need to correctly match the supertypes in the new group as well. + // We keep track of the old type ids to new type ids in a map. + let mut old_to_new: BTreeMap = BTreeMap::new(); + for (old_tid, _, _) in &members { + old_to_new.insert(*old_tid, ops.types.fresh_type_id(ctx.rng())); } - // Since our structs are empty, we can just insert them into the new rec group. - // We will update mutators while adding new features to the fuzzer. - for _ in 0..count { + // Insert duplicated defs, rewriting intra-group supertype edges to the cloned ids. + for (old_tid, is_final, supertype) in &members { + // Get the new type id for the old type id. + let new_tid = old_to_new[old_tid]; + + // Map the supertype to the new type id. + // If it has no supertype, we keep it as None. + // If its supertype is in the same group, we map it to the new type id. + // If its supertype is in a different group, we keep it as is. + let mapped_super = supertype.map(|st| *old_to_new.get(&st).unwrap_or(&st)); ops.types - .insert_empty_struct(ops.types.fresh_type_id(ctx.rng()), new_gid); + .insert_empty_struct(new_tid, new_gid, *is_final, mapped_super); } - log::debug!( - "Duplicated rec group {source_gid:?} as new group {new_gid:?} ({count} types)" - ); Ok(()) })?; + Ok(()) } - // Define a mutation that removes a whole (rec ...) group. + /// Define a mutation that removes a whole (rec ...) group. fn remove_rec_group(&mut self, c: &mut Candidates<'_>, ops: &mut GcOps) -> mutatis::Result<()> { if ops.types.rec_groups.len() <= 2 { return Ok(()); @@ -247,16 +266,15 @@ impl GcOpsMutator { .copied() .expect("rec_groups not empty"); - ops.types.type_defs.retain(|_, def| def.rec_group != gid); ops.types.rec_groups.remove(&gid); - log::debug!("Removed rec group {gid:?} and its member types"); + log::debug!("Removed rec group {gid:?}"); Ok(()) })?; Ok(()) } - // Define a mutation that merges two (rec ...) groups. + /// Define a mutation that merges two (rec ...) groups. fn merge_rec_groups(&mut self, c: &mut Candidates<'_>, ops: &mut GcOps) -> mutatis::Result<()> { if ops.types.rec_groups.is_empty() || ops.types.rec_groups.len() <= 2 { return Ok(()); @@ -359,7 +377,7 @@ impl GcOpsMutator { }; let k = k_minus_1 + 1; - // Move k distinct members by removing them from `members` as we pick. + // Move k distinct members by removing them from `members as we pick. for _ in 0..k { let Some(i) = ctx.rng().gen_index(members.len()) else { break; diff --git a/crates/fuzzing/src/generators/gc_ops/ops.rs b/crates/fuzzing/src/generators/gc_ops/ops.rs index 205b3dd95634..dccac72e5b35 100644 --- a/crates/fuzzing/src/generators/gc_ops/ops.rs +++ b/crates/fuzzing/src/generators/gc_ops/ops.rs @@ -69,7 +69,8 @@ impl GcOps { ); // 1: "run" - let mut params: Vec = Vec::with_capacity(self.limits.num_params as usize); + let mut params: Vec = + Vec::with_capacity(usize::try_from(self.limits.num_params).unwrap()); for _i in 0..self.limits.num_params { params.push(ValType::EXTERNREF); } @@ -104,24 +105,46 @@ impl GcOps { let struct_type_base: u32 = types.len(); + let type_order: Vec = self.types.sort_types_by_supertype(); + + // Build per-group member lists in `type_order` order (each group's list is a subsequence + // of `type_order`, so within-group supertypes come before subtypes when possible). let mut rec_groups: BTreeMap> = self .types .rec_groups .iter() .copied() - .map(|id| (id, Vec::new())) + .map(|g| (g, Vec::new())) .collect(); - for (id, ty) in self.types.type_defs.iter() { - rec_groups.entry(ty.rec_group).or_default().push(*id); + for &id in &type_order { + let g = self.types.type_defs[&id].rec_group; + rec_groups.get_mut(&g).unwrap().push(id); } + // Topological sort of rec-groups based on cross-group supertype edges. + let group_order: Vec = self.types.sort_rec_groups_topo(&rec_groups); + + // Build the actual emission order and index map from it. + let encoding_order: Vec = group_order + .iter() + .flat_map(|g| rec_groups[g].iter().copied()) + .collect(); + + // Build the index map from the encoding order. + let type_ids_to_index: BTreeMap = encoding_order + .iter() + .copied() + .enumerate() + .map(|(i, id)| (id, struct_type_base + u32::try_from(i).unwrap())) + .collect(); + let encode_ty_id = |ty_id: &TypeId| -> wasm_encoder::SubType { let def = &self.types.type_defs[ty_id]; match &def.composite_type { CompositeType::Struct(StructType {}) => wasm_encoder::SubType { - is_final: true, - supertype_idx: None, + is_final: def.is_final, + supertype_idx: def.supertype.map(|st| type_ids_to_index[&st]), composite_type: wasm_encoder::CompositeType { inner: wasm_encoder::CompositeInnerType::Struct(wasm_encoder::StructType { fields: Box::new([]), @@ -136,10 +159,12 @@ impl GcOps { let mut struct_count = 0; - for type_ids in rec_groups.values() { + // Emit in topological order of rec-groups. + for g in &group_order { + let type_ids = &rec_groups[g]; let members: Vec = type_ids.iter().map(encode_ty_id).collect(); types.ty().rec(members); - struct_count += type_ids.len() as u32; + struct_count += u32::try_from(type_ids.len()).unwrap(); } let typed_fn_type_base: u32 = struct_type_base + struct_count; diff --git a/crates/fuzzing/src/generators/gc_ops/scc.rs b/crates/fuzzing/src/generators/gc_ops/scc.rs new file mode 100644 index 000000000000..2e605a1298f4 --- /dev/null +++ b/crates/fuzzing/src/generators/gc_ops/scc.rs @@ -0,0 +1,212 @@ +//! Strongly-connected components (Tarjan, iterative). +//! +//! Same engineering as Wasmtime's inliner SCC: +//! - O(V+E) +//! - iterative (no recursion) +//! - components stored as `Vec>` into a flat node buffer +//! - deterministic via ordered containers (BTreeMap/BTreeSet) +//! This is a modified version of Wasmtime's inliner SCC. +//! Please see: https://github.com/bytecodealliance/wasmtime/blob/main/crates/wasmtime/src/compile/scc.rs + +use std::{ + collections::{BTreeMap, BTreeSet}, + ops::Range, +}; + +use crate::generators::gc_ops::types::RecGroupId; + +/// SCC results: `components` maps each SCC to a slice range in `component_nodes`. +pub struct StronglyConnectedComponents { + components: Vec>, + component_nodes: Vec, +} + +impl StronglyConnectedComponents { + /// Find SCCs in the given graph. + pub fn new(nodes: I, successors: F) -> Self + where + I: IntoIterator, + F: Fn(RecGroupId) -> S, + S: Iterator, + { + let nodes = nodes.into_iter(); + + // The resulting components and their nodes. + let mut component_nodes: Vec = vec![]; + let mut components: Vec> = vec![]; + + // The DFS index counter. + let mut index = NonMaxU32::default(); + + // DFS index and lowlink for each RecGroupId. + // Because RecGroupId is not dense, we use BTreeMap. + let mut indices: BTreeMap = BTreeMap::new(); + let mut lowlinks: BTreeMap = BTreeMap::new(); + + // SCC stack and membership. + let mut stack: Vec = vec![]; + let mut on_stack: BTreeSet = BTreeSet::new(); + + let mut dfs = Dfs::new(nodes); + while let Some(event) = dfs.next( + &successors, + // seen? + |node| indices.contains_key(&node), + ) { + match event { + DfsEvent::Pre(node) => { + debug_assert!(!indices.contains_key(&node)); + debug_assert!(!lowlinks.contains_key(&node)); + + indices.insert(node, index); + lowlinks.insert(node, index); + + index = NonMaxU32::new(index.get() + 1).unwrap(); + + stack.push(node); + let inserted = on_stack.insert(node); + debug_assert!(inserted); + } + + DfsEvent::AfterEdge(node, succ) => { + let node_idx = indices[&node]; + let node_low = lowlinks[&node]; + let succ_idx = indices[&succ]; + let succ_low = lowlinks[&succ]; + + debug_assert!(node_low <= node_idx); + debug_assert!(succ_low <= succ_idx); + + if on_stack.contains(&succ) { + let new_low = std::cmp::min(node_low, succ_low); + lowlinks.insert(node, new_low); + } + } + + DfsEvent::Post(node) => { + let node_idx = indices[&node]; + let node_low = lowlinks[&node]; + + if node_idx == node_low { + // Node is SCC root. Pop until node. + let start = u32::try_from(component_nodes.len()).unwrap(); + + loop { + let v = stack.pop().unwrap(); + let removed = on_stack.remove(&v); + debug_assert!(removed); + + component_nodes.push(v); + + if v == node { + break; + } + } + + let end = u32::try_from(component_nodes.len()).unwrap(); + components.push(start..end); + } + } + } + } + + Self { + components, + component_nodes, + } + } + + fn node_range(&self, range: Range) -> &[RecGroupId] { + let start = usize::try_from(range.start).unwrap(); + let end = usize::try_from(range.end).unwrap(); + &self.component_nodes[start..end] + } + + /// Iterate SCCs. + pub fn iter(&self) -> impl ExactSizeIterator + '_ { + self.components.iter().map(|r| self.node_range(r.clone())) + } +} + +/// An iterative depth-first traversal. +struct Dfs { + stack: Vec, +} + +impl Dfs { + fn new(roots: impl IntoIterator) -> Self { + Self { + stack: roots.into_iter().map(DfsEvent::Pre).collect(), + } + } + + fn next( + &mut self, + successors: impl Fn(RecGroupId) -> S, + seen: impl Fn(RecGroupId) -> bool, + ) -> Option + where + S: Iterator, + { + loop { + let event = self.stack.pop()?; + + if let DfsEvent::Pre(node) = event { + if seen(node) { + continue; + } + + let succs = successors(node); + let (min, max) = succs.size_hint(); + let est = max.unwrap_or_else(|| 2 * min); + + self.stack.reserve(2 * est + 1); + + self.stack.push(DfsEvent::Post(node)); + for succ in succs { + self.stack.push(DfsEvent::AfterEdge(node, succ)); + if !seen(succ) { + self.stack.push(DfsEvent::Pre(succ)); + } + } + } + + return Some(event); + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum DfsEvent { + Pre(RecGroupId), + AfterEdge(RecGroupId, RecGroupId), + Post(RecGroupId), +} + +mod non_max { + use std::num::NonZeroU32; + + #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct NonMaxU32(NonZeroU32); + + impl Default for NonMaxU32 { + fn default() -> Self { + Self::new(0).unwrap() + } + } + + impl NonMaxU32 { + pub fn new(x: u32) -> Option { + if x == u32::MAX { + None + } else { + Some(Self(unsafe { NonZeroU32::new_unchecked(x + 1) })) + } + } + + pub fn get(&self) -> u32 { + self.0.get() - 1 + } + } +} +use non_max::NonMaxU32; diff --git a/crates/fuzzing/src/generators/gc_ops/tests.rs b/crates/fuzzing/src/generators/gc_ops/tests.rs index 57f3073d8c5c..503bcd55b55c 100644 --- a/crates/fuzzing/src/generators/gc_ops/tests.rs +++ b/crates/fuzzing/src/generators/gc_ops/tests.rs @@ -6,6 +6,7 @@ use crate::generators::gc_ops::{ use mutatis; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; +use std::collections::BTreeMap; use wasmparser; use wasmprinter; @@ -59,7 +60,10 @@ fn test_ops(num_params: u32, num_globals: u32, table_size: u32) -> GcOps { if t.limits.max_rec_groups > 0 { for i in 0..t.limits.max_types { let gid = RecGroupId(rng.gen_range(0..t.limits.max_rec_groups)); - t.types.insert_empty_struct(TypeId(i), gid); + let is_final = false; + let supertype = None; + t.types + .insert_empty_struct(TypeId(i), gid, is_final, supertype); } } @@ -287,3 +291,78 @@ fn fixup_check_types_and_indexes() -> mutatis::Result<()> { Ok(()) } + +#[test] +fn sort_types_by_supertype_orders_supertype_before_subtype() { + let mut types = Types::new(); + let g = RecGroupId(0); + types.insert_rec_group(g); + + let a = TypeId(0); + let b = TypeId(1); + let c = TypeId(2); + let d = TypeId(3); + + types.insert_empty_struct(a, g, false, Some(b)); // A <: B + types.insert_empty_struct(b, g, false, Some(d)); // B <: D + types.insert_empty_struct(c, g, false, Some(a)); // C <: A + types.insert_empty_struct(d, g, false, None); // D + + let sorted = types.sort_types_by_supertype(); + + // D(3) root, B(1)<:D, A(0)<:B, C(2)<:A => supertype-before-subtype order. + assert_eq!( + sorted, + [TypeId(3), TypeId(1), TypeId(0), TypeId(2)], + "topo order: supertype before subtype" + ); +} + +#[test] +fn merge_rec_groups_via_scc_merges_three_cycle() { + let mut types = Types::new(); + let g0 = RecGroupId(0); + let g1 = RecGroupId(1); + let g2 = RecGroupId(2); + + types.insert_rec_group(g0); + types.insert_rec_group(g1); + types.insert_rec_group(g2); + + // One type per group: + // + // t0 in g0, supertype = t1 (in g1) => g0 depends on g1 + // t1 in g1, supertype = t2 (in g2) => g1 depends on g2 + // t2 in g2, supertype = t0 (in g0) => g2 depends on g0 + // + // This makes a 3-cycle among rec-groups: g0 -> g1 -> g2 -> g0 + // Merge should fuse all into one group (canonical keep = min = g0). + + types.insert_empty_struct(TypeId(0), g0, false, Some(TypeId(1))); + types.insert_empty_struct(TypeId(1), g1, false, Some(TypeId(2))); + types.insert_empty_struct(TypeId(2), g2, false, Some(TypeId(0))); + + // Build RecGroupId -> Vec member lists for merge input. + let mut rec_groups: BTreeMap> = types + .rec_groups + .iter() + .copied() + .map(|g| (g, Vec::new())) + .collect(); + for (id, def) in types.type_defs.iter() { + rec_groups.entry(def.rec_group).or_default().push(*id); + } + + assert_eq!(types.rec_groups.len(), 3); + types.merge_rec_groups_via_scc(&rec_groups); + + // After merge: one group (g0), all three types in it. + assert_eq!(types.rec_groups.len(), 1); + assert!(types.rec_groups.contains(&g0)); + assert!(!types.rec_groups.contains(&g1)); + assert!(!types.rec_groups.contains(&g2)); + + assert_eq!(types.type_defs.get(&TypeId(0)).unwrap().rec_group, g0); + assert_eq!(types.type_defs.get(&TypeId(1)).unwrap().rec_group, g0); + assert_eq!(types.type_defs.get(&TypeId(2)).unwrap().rec_group, g0); +} diff --git a/crates/fuzzing/src/generators/gc_ops/types.rs b/crates/fuzzing/src/generators/gc_ops/types.rs index 34187e1e101c..29b77058df94 100644 --- a/crates/fuzzing/src/generators/gc_ops/types.rs +++ b/crates/fuzzing/src/generators/gc_ops/types.rs @@ -2,8 +2,9 @@ use crate::generators::gc_ops::limits::GcOpsLimits; use crate::generators::gc_ops::ops::GcOp; +use crate::generators::gc_ops::scc::StronglyConnectedComponents; use serde::{Deserialize, Serialize}; -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::{BTreeMap, BTreeSet, VecDeque}; /// RecGroup ID struct definition. #[derive( @@ -34,6 +35,8 @@ pub enum CompositeType { #[derive(Debug, Serialize, Deserialize)] pub struct SubType { pub(crate) rec_group: RecGroupId, + pub(crate) is_final: bool, + pub(crate) supertype: Option, pub(crate) composite_type: CompositeType, } /// Struct types definition. @@ -52,6 +55,236 @@ impl Types { } } + /// Break cycles in supertype edges within each rec-group by dropping some edges. + pub fn break_type_cycles_in_rec_groups(&mut self) { + // Kill self-edges to avoid cycles. + for (id, def) in self.type_defs.iter_mut() { + if def.supertype == Some(*id) { + def.supertype = None; + } + } + + // Build group -> member list from current truth. + let mut members: BTreeMap> = BTreeMap::new(); + for (id, def) in self.type_defs.iter() { + members.entry(def.rec_group).or_default().push(*id); + } + + // For each group, break cycles in the TypeId supertype graph. + for (_g, ids) in members.iter() { + if ids.len() <= 1 { + continue; + } + + let id_set: BTreeSet = ids.iter().copied().collect(); + + // DFS from each node, if we revisit a node in the + // current path, we found a cycle. Break it by clearing supertype. + let mut visited = BTreeSet::new(); + for &start in ids { + if visited.contains(&start) { + continue; + } + + let mut path = Vec::new(); + let mut path_set = BTreeSet::new(); + let mut cur = start; + + loop { + if path_set.contains(&cur) { + // Found a cycle. Clear supertype to break it. + if let Some(def) = self.type_defs.get_mut(&cur) { + def.supertype = None; + } + break; + } + + if visited.contains(&cur) { + break; + } + + path.push(cur); + path_set.insert(cur); + visited.insert(cur); + + let next = self.type_defs.get(&cur).and_then(|d| d.supertype); + match next { + Some(st) if id_set.contains(&st) => cur = st, + _ => break, + } + } + } + } + } + + /// Get the successors of the given rec-group. + /// It is used to find the SCCs. + fn rec_group_successors<'a>( + &'a self, + rec_groups: &'a BTreeMap>, + g: RecGroupId, + ) -> impl Iterator + 'a { + let mut deps = BTreeSet::::new(); + + for &ty in &rec_groups[&g] { + if let Some(st) = self.type_defs[&ty].supertype { + let h = self.type_defs[&st].rec_group; + if h != g { + deps.insert(h); + } + } + } + + deps.into_iter() + } + + /// Merge rec-groups that participate in dependency cycles. + pub fn merge_rec_groups_via_scc(&mut self, rec_groups: &BTreeMap>) { + let nodes = rec_groups.keys().copied(); + let sccs = + StronglyConnectedComponents::new(nodes, |g| self.rec_group_successors(rec_groups, g)); + + for groups in sccs.iter() { + if groups.len() <= 1 { + continue; + } + + // Deterministic canonical "keep" group. + // Smallest RecGroupId in the SCC. + let keep = *groups.iter().min().unwrap(); + + // Merge every other group into "keep" group by rewriting only the members of that group. + for &g in groups { + if g == keep { + continue; + } + + if let Some(members) = rec_groups.get(&g) { + for &ty in members { + if let Some(def) = self.type_defs.get_mut(&ty) { + def.rec_group = keep; + } + } + } + + // Drop g from the rec-group set. + self.rec_groups.remove(&g); + } + } + + debug_assert!( + self.type_defs + .values() + .all(|d| self.rec_groups.contains(&d.rec_group)), + "after rec-group merge, some type_defs still reference removed rec-groups" + ); + } + + /// Topological sort of rec-groups. + pub fn sort_rec_groups_topo( + &self, + rec_groups: &BTreeMap>, + ) -> Vec { + // deps[g] = set of groups that must come before g + let mut deps: BTreeMap> = rec_groups + .keys() + .copied() + .map(|g| (g, BTreeSet::new())) + .collect(); + + for (&g, members) in rec_groups { + for &id in members { + let def = &self.type_defs[&id]; + if let Some(st) = def.supertype { + let st_group = self.type_defs[&st].rec_group; + if st_group != g { + deps.get_mut(&g).unwrap().insert(st_group); + } + } + } + } + + // indeg[g] = number of prerequisites + let mut indeg: BTreeMap = deps.keys().copied().map(|g| (g, 0)).collect(); + for (&g, ds) in &deps { + *indeg.get_mut(&g).unwrap() = ds.len(); + } + + // Prerequisite -> dependents + let mut users: BTreeMap> = BTreeMap::new(); + for (&g, ds) in &deps { + for &d in ds { + users.entry(d).or_default().push(g); + } + } + + // Kahn queue + let mut q = VecDeque::new(); + for (&g, &d) in &indeg { + if d == 0 { + q.push_back(g); + } + } + + let mut out = Vec::with_capacity(indeg.len()); + while let Some(g) = q.pop_front() { + out.push(g); + if let Some(us) = users.get(&g) { + for &u in us { + let e = indeg.get_mut(&u).unwrap(); + *e -= 1; + if *e == 0 { + q.push_back(u); + } + } + } + } + + debug_assert_eq!(out.len(), indeg.len(), "cycle in rec-group dependencies"); + out + } + + /// Topological sort of types by their supertype (supertype before subtype). + pub fn sort_types_by_supertype(&self) -> Vec { + #[derive(Copy, Clone, Debug)] + enum Event { + Enter, + Exit, + } + + let mut stack: Vec<(Event, TypeId)> = self + .type_defs + .keys() + .copied() + .map(|id| (Event::Enter, id)) + .collect(); + + stack.reverse(); + + let mut sorted = Vec::with_capacity(self.type_defs.len()); + let mut seen = BTreeSet::::new(); + + while let Some((event, id)) = stack.pop() { + match event { + Event::Enter => { + if seen.insert(id) { + stack.push((Event::Exit, id)); + + if let Some(super_id) = self.type_defs[&id].supertype { + if !seen.contains(&super_id) { + stack.push((Event::Enter, super_id)); + } + } + } + } + Event::Exit => { + sorted.push(id); + } + } + } + sorted + } + /// Returns a fresh rec-group id that is not already in use. pub fn fresh_rec_group_id(&self, rng: &mut mutatis::Rng) -> RecGroupId { for _ in 0..1000 { @@ -79,20 +312,28 @@ impl Types { self.rec_groups.insert(id) } - /// Insert a rec-group id. - pub fn insert_empty_struct(&mut self, id: TypeId, group: RecGroupId) { + /// Insert an empty struct type with the given rec group, "is_final", and optional supertype. + pub fn insert_empty_struct( + &mut self, + id: TypeId, + group: RecGroupId, + is_final: bool, + supertype: Option, + ) { self.type_defs.insert( id, SubType { rec_group: group, + is_final, + supertype, composite_type: CompositeType::Struct(StructType::default()), }, ); } - /// Removes any entries beyond the given limit. + /// Fixup type-related inconsistencies. pub fn fixup(&mut self, limits: &GcOpsLimits) { - while self.rec_groups.len() > limits.max_rec_groups as usize { + while self.rec_groups.len() > usize::try_from(limits.max_rec_groups).unwrap() { self.rec_groups.pop_last(); } @@ -101,10 +342,50 @@ impl Types { .retain(|_, ty| self.rec_groups.contains(&ty.rec_group)); // Then enforce the max types limit. - while self.type_defs.len() > limits.max_types as usize { + while self.type_defs.len() > usize::try_from(limits.max_types).unwrap() { self.type_defs.pop_last(); } + // If supertype is gone, make the current type's supertype None. + let valid_type_ids: BTreeSet = self.type_defs.keys().copied().collect(); + for def in self.type_defs.values_mut() { + if let Some(st) = def.supertype { + if !valid_type_ids.contains(&st) { + def.supertype = None; + } + } + } + + // A subtype cannot have a final supertype. Clear supertype when super is final. + let final_type_ids: BTreeSet = self + .type_defs + .iter() + .filter(|(_, d)| d.is_final) + .map(|(id, _)| *id) + .collect(); + for def in self.type_defs.values_mut() { + if let Some(st) = def.supertype { + if final_type_ids.contains(&st) { + def.supertype = None; + } + } + } + + // Build rec_groups map for cycle detection and merging. + let mut rec_groups_map: BTreeMap> = self + .rec_groups + .iter() + .copied() + .map(|g| (g, Vec::new())) + .collect(); + + for (id, ty) in self.type_defs.iter() { + rec_groups_map.entry(ty.rec_group).or_default().push(*id); + } + + self.merge_rec_groups_via_scc(&rec_groups_map); + self.break_type_cycles_in_rec_groups(); + debug_assert!( self.type_defs .values() From 59c3287e1ceec73199bfd3af78a3411c155274ff Mon Sep 17 00:00:00 2001 From: Khagan Karimov Date: Wed, 11 Feb 2026 21:47:37 -0700 Subject: [PATCH 2/6] Add rec_groupsis_empty() check to split_mutator --- crates/fuzzing/src/generators/gc_ops/mutator.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/fuzzing/src/generators/gc_ops/mutator.rs b/crates/fuzzing/src/generators/gc_ops/mutator.rs index e308bb7f1b1d..d84127bb0e3a 100644 --- a/crates/fuzzing/src/generators/gc_ops/mutator.rs +++ b/crates/fuzzing/src/generators/gc_ops/mutator.rs @@ -332,6 +332,7 @@ impl GcOpsMutator { // Define a mutation that splits a (rec ...) group in two, if possible. fn split_rec_group(&mut self, c: &mut Candidates<'_>, ops: &mut GcOps) -> mutatis::Result<()> { if c.shrink() + || ops.types.rec_groups.is_empty() || ops.types.rec_groups.len() >= usize::try_from(ops.limits.max_rec_groups).unwrap() || ops.types.type_defs.len() < 2 { From b82667e7753f6796b584a3e0c359f0902b240b27 Mon Sep 17 00:00:00 2001 From: Khagan Karimov Date: Mon, 9 Mar 2026 17:12:53 -0600 Subject: [PATCH 3/6] Address feedback (1) --- crates/fuzzing/src/generators/gc_ops/mod.rs | 1 - crates/fuzzing/src/generators/gc_ops/ops.rs | 7 +- crates/fuzzing/src/generators/gc_ops/scc.rs | 212 --------- crates/fuzzing/src/generators/gc_ops/tests.rs | 173 +++++-- crates/fuzzing/src/generators/gc_ops/types.rs | 425 ++++++++++-------- 5 files changed, 384 insertions(+), 434 deletions(-) delete mode 100644 crates/fuzzing/src/generators/gc_ops/scc.rs diff --git a/crates/fuzzing/src/generators/gc_ops/mod.rs b/crates/fuzzing/src/generators/gc_ops/mod.rs index 5dfd8821b3a3..9a492cbee106 100644 --- a/crates/fuzzing/src/generators/gc_ops/mod.rs +++ b/crates/fuzzing/src/generators/gc_ops/mod.rs @@ -3,7 +3,6 @@ pub mod limits; pub mod mutator; pub mod ops; -pub mod scc; pub mod types; #[cfg(test)] diff --git a/crates/fuzzing/src/generators/gc_ops/ops.rs b/crates/fuzzing/src/generators/gc_ops/ops.rs index dccac72e5b35..e45ee90178cf 100644 --- a/crates/fuzzing/src/generators/gc_ops/ops.rs +++ b/crates/fuzzing/src/generators/gc_ops/ops.rs @@ -105,7 +105,8 @@ impl GcOps { let struct_type_base: u32 = types.len(); - let type_order: Vec = self.types.sort_types_by_supertype(); + let mut type_order: Vec = Vec::with_capacity(self.types.type_defs.len()); + self.types.sort_types_by_supertype(&mut type_order); // Build per-group member lists in `type_order` order (each group's list is a subsequence // of `type_order`, so within-group supertypes come before subtypes when possible). @@ -123,7 +124,9 @@ impl GcOps { } // Topological sort of rec-groups based on cross-group supertype edges. - let group_order: Vec = self.types.sort_rec_groups_topo(&rec_groups); + let mut group_order: Vec = Vec::with_capacity(rec_groups.len()); + self.types + .sort_rec_groups_topo(&mut group_order, &rec_groups); // Build the actual emission order and index map from it. let encoding_order: Vec = group_order diff --git a/crates/fuzzing/src/generators/gc_ops/scc.rs b/crates/fuzzing/src/generators/gc_ops/scc.rs deleted file mode 100644 index 2e605a1298f4..000000000000 --- a/crates/fuzzing/src/generators/gc_ops/scc.rs +++ /dev/null @@ -1,212 +0,0 @@ -//! Strongly-connected components (Tarjan, iterative). -//! -//! Same engineering as Wasmtime's inliner SCC: -//! - O(V+E) -//! - iterative (no recursion) -//! - components stored as `Vec>` into a flat node buffer -//! - deterministic via ordered containers (BTreeMap/BTreeSet) -//! This is a modified version of Wasmtime's inliner SCC. -//! Please see: https://github.com/bytecodealliance/wasmtime/blob/main/crates/wasmtime/src/compile/scc.rs - -use std::{ - collections::{BTreeMap, BTreeSet}, - ops::Range, -}; - -use crate::generators::gc_ops::types::RecGroupId; - -/// SCC results: `components` maps each SCC to a slice range in `component_nodes`. -pub struct StronglyConnectedComponents { - components: Vec>, - component_nodes: Vec, -} - -impl StronglyConnectedComponents { - /// Find SCCs in the given graph. - pub fn new(nodes: I, successors: F) -> Self - where - I: IntoIterator, - F: Fn(RecGroupId) -> S, - S: Iterator, - { - let nodes = nodes.into_iter(); - - // The resulting components and their nodes. - let mut component_nodes: Vec = vec![]; - let mut components: Vec> = vec![]; - - // The DFS index counter. - let mut index = NonMaxU32::default(); - - // DFS index and lowlink for each RecGroupId. - // Because RecGroupId is not dense, we use BTreeMap. - let mut indices: BTreeMap = BTreeMap::new(); - let mut lowlinks: BTreeMap = BTreeMap::new(); - - // SCC stack and membership. - let mut stack: Vec = vec![]; - let mut on_stack: BTreeSet = BTreeSet::new(); - - let mut dfs = Dfs::new(nodes); - while let Some(event) = dfs.next( - &successors, - // seen? - |node| indices.contains_key(&node), - ) { - match event { - DfsEvent::Pre(node) => { - debug_assert!(!indices.contains_key(&node)); - debug_assert!(!lowlinks.contains_key(&node)); - - indices.insert(node, index); - lowlinks.insert(node, index); - - index = NonMaxU32::new(index.get() + 1).unwrap(); - - stack.push(node); - let inserted = on_stack.insert(node); - debug_assert!(inserted); - } - - DfsEvent::AfterEdge(node, succ) => { - let node_idx = indices[&node]; - let node_low = lowlinks[&node]; - let succ_idx = indices[&succ]; - let succ_low = lowlinks[&succ]; - - debug_assert!(node_low <= node_idx); - debug_assert!(succ_low <= succ_idx); - - if on_stack.contains(&succ) { - let new_low = std::cmp::min(node_low, succ_low); - lowlinks.insert(node, new_low); - } - } - - DfsEvent::Post(node) => { - let node_idx = indices[&node]; - let node_low = lowlinks[&node]; - - if node_idx == node_low { - // Node is SCC root. Pop until node. - let start = u32::try_from(component_nodes.len()).unwrap(); - - loop { - let v = stack.pop().unwrap(); - let removed = on_stack.remove(&v); - debug_assert!(removed); - - component_nodes.push(v); - - if v == node { - break; - } - } - - let end = u32::try_from(component_nodes.len()).unwrap(); - components.push(start..end); - } - } - } - } - - Self { - components, - component_nodes, - } - } - - fn node_range(&self, range: Range) -> &[RecGroupId] { - let start = usize::try_from(range.start).unwrap(); - let end = usize::try_from(range.end).unwrap(); - &self.component_nodes[start..end] - } - - /// Iterate SCCs. - pub fn iter(&self) -> impl ExactSizeIterator + '_ { - self.components.iter().map(|r| self.node_range(r.clone())) - } -} - -/// An iterative depth-first traversal. -struct Dfs { - stack: Vec, -} - -impl Dfs { - fn new(roots: impl IntoIterator) -> Self { - Self { - stack: roots.into_iter().map(DfsEvent::Pre).collect(), - } - } - - fn next( - &mut self, - successors: impl Fn(RecGroupId) -> S, - seen: impl Fn(RecGroupId) -> bool, - ) -> Option - where - S: Iterator, - { - loop { - let event = self.stack.pop()?; - - if let DfsEvent::Pre(node) = event { - if seen(node) { - continue; - } - - let succs = successors(node); - let (min, max) = succs.size_hint(); - let est = max.unwrap_or_else(|| 2 * min); - - self.stack.reserve(2 * est + 1); - - self.stack.push(DfsEvent::Post(node)); - for succ in succs { - self.stack.push(DfsEvent::AfterEdge(node, succ)); - if !seen(succ) { - self.stack.push(DfsEvent::Pre(succ)); - } - } - } - - return Some(event); - } - } -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -enum DfsEvent { - Pre(RecGroupId), - AfterEdge(RecGroupId, RecGroupId), - Post(RecGroupId), -} - -mod non_max { - use std::num::NonZeroU32; - - #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] - pub struct NonMaxU32(NonZeroU32); - - impl Default for NonMaxU32 { - fn default() -> Self { - Self::new(0).unwrap() - } - } - - impl NonMaxU32 { - pub fn new(x: u32) -> Option { - if x == u32::MAX { - None - } else { - Some(Self(unsafe { NonZeroU32::new_unchecked(x + 1) })) - } - } - - pub fn get(&self) -> u32 { - self.0.get() - 1 - } - } -} -use non_max::NonMaxU32; diff --git a/crates/fuzzing/src/generators/gc_ops/tests.rs b/crates/fuzzing/src/generators/gc_ops/tests.rs index 503bcd55b55c..98b6e909957b 100644 --- a/crates/fuzzing/src/generators/gc_ops/tests.rs +++ b/crates/fuzzing/src/generators/gc_ops/tests.rs @@ -6,7 +6,6 @@ use crate::generators::gc_ops::{ use mutatis; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; -use std::collections::BTreeMap; use wasmparser; use wasmprinter; @@ -308,7 +307,8 @@ fn sort_types_by_supertype_orders_supertype_before_subtype() { types.insert_empty_struct(c, g, false, Some(a)); // C <: A types.insert_empty_struct(d, g, false, None); // D - let sorted = types.sort_types_by_supertype(); + let mut sorted = Vec::new(); + types.sort_types_by_supertype(&mut sorted); // D(3) root, B(1)<:D, A(0)<:B, C(2)<:A => supertype-before-subtype order. assert_eq!( @@ -319,8 +319,97 @@ fn sort_types_by_supertype_orders_supertype_before_subtype() { } #[test] -fn merge_rec_groups_via_scc_merges_three_cycle() { +fn fixup_preserves_subtyping_within_same_rec_group() { + let _ = env_logger::try_init(); + let mut types = Types::new(); + let g = RecGroupId(0); + types.insert_rec_group(g); + + let super_ty = TypeId(0); + let sub_ty = TypeId(1); + + // Both types are in the same rec group. + // The second subtypes the first. + types.insert_empty_struct(super_ty, g, false, None); + types.insert_empty_struct(sub_ty, g, false, Some(super_ty)); + + let limits = GcOpsLimits { + num_params: 0, + num_globals: 0, + table_size: 0, + max_rec_groups: 10, + max_types: 10, + }; + + types.fixup(&limits); + + assert_eq!(types.type_defs.get(&super_ty).unwrap().rec_group, g); + assert_eq!(types.type_defs.get(&sub_ty).unwrap().rec_group, g); + assert_eq!( + types.type_defs.get(&sub_ty).unwrap().supertype, + Some(super_ty) + ); +} + +#[test] +fn fixup_breaks_one_edge_in_multi_rec_group_type_cycle() { + let _ = env_logger::try_init(); + + let mut types = Types::new(); + + let g_a = RecGroupId(0); + let g_bc = RecGroupId(1); + let g_d = RecGroupId(2); + + types.insert_rec_group(g_a); + types.insert_rec_group(g_bc); + types.insert_rec_group(g_d); + + let a = TypeId(0); + let b = TypeId(1); + let c = TypeId(2); + let d = TypeId(3); + + // Rec(a) + types.insert_empty_struct(a, g_a, false, Some(d)); + + // Rec(b, c) + types.insert_empty_struct(b, g_bc, false, None); + types.insert_empty_struct(c, g_bc, false, Some(a)); + + // Rec(d) + types.insert_empty_struct(d, g_d, false, Some(c)); + + let limits = GcOpsLimits { + num_params: 0, + num_globals: 0, + table_size: 0, + max_rec_groups: 10, + max_types: 10, + }; + + types.fixup(&limits); + + let a_super = types.type_defs.get(&a).unwrap().supertype; + let c_super = types.type_defs.get(&c).unwrap().supertype; + let d_super = types.type_defs.get(&d).unwrap().supertype; + + let cleared = [a_super, c_super, d_super] + .into_iter() + .filter(|st| st.is_none()) + .count(); + + assert!( + cleared == 1, + "fixup should clear exactly one edge to break the cycle" + ); +} + +#[test] +fn merge_rec_groups_via_scc_merges_group_cycle_without_type_cycle() { + let mut types = Types::new(); + let g0 = RecGroupId(0); let g1 = RecGroupId(1); let g2 = RecGroupId(2); @@ -329,40 +418,66 @@ fn merge_rec_groups_via_scc_merges_three_cycle() { types.insert_rec_group(g1); types.insert_rec_group(g2); - // One type per group: + let a0 = TypeId(0); + let a1 = TypeId(1); + let b0 = TypeId(2); + let b1 = TypeId(3); + let c0 = TypeId(4); + let c1 = TypeId(5); + + // g0 = {a0, a1} + // g1 = {b0, b1} + // g2 = {c0, c1} // - // t0 in g0, supertype = t1 (in g1) => g0 depends on g1 - // t1 in g1, supertype = t2 (in g2) => g1 depends on g2 - // t2 in g2, supertype = t0 (in g0) => g2 depends on g0 + // Cross-group subtype edges: + // a0 <: b0 => g0 -> g1 + // b1 <: c0 => g1 -> g2 + // c1 <: a1 => g2 -> g0 // - // This makes a 3-cycle among rec-groups: g0 -> g1 -> g2 -> g0 - // Merge should fuse all into one group (canonical keep = min = g0). - - types.insert_empty_struct(TypeId(0), g0, false, Some(TypeId(1))); - types.insert_empty_struct(TypeId(1), g1, false, Some(TypeId(2))); - types.insert_empty_struct(TypeId(2), g2, false, Some(TypeId(0))); - - // Build RecGroupId -> Vec member lists for merge input. - let mut rec_groups: BTreeMap> = types - .rec_groups - .iter() - .copied() - .map(|g| (g, Vec::new())) - .collect(); - for (id, def) in types.type_defs.iter() { - rec_groups.entry(def.rec_group).or_default().push(*id); - } + // This creates a cycle in the rec-group dependency graph: + // g0 -> g1 -> g2 -> g0 + // + // But the type graph itself is acyclic, because these are three separate + // subtype edges on different types: + // a0 -> b0 + // b1 -> c0 + // c1 -> a1 + // + // Therefore, breaking type cycles is not enough here. Merging rec-group + // SCCs is what resolves the cyclic dependency among rec groups. + + types.insert_empty_struct(a0, g0, false, Some(b0)); + types.insert_empty_struct(a1, g0, false, None); + + types.insert_empty_struct(b0, g1, false, None); + types.insert_empty_struct(b1, g1, false, Some(c0)); + + types.insert_empty_struct(c0, g2, false, None); + types.insert_empty_struct(c1, g2, false, Some(a1)); + + // There is no type cycle, so breaking supertype cycles should not change anything. + types.break_supertype_cycles(); + + assert_eq!(types.type_defs.get(&a0).unwrap().supertype, Some(b0)); + assert_eq!(types.type_defs.get(&b1).unwrap().supertype, Some(c0)); + assert_eq!(types.type_defs.get(&c1).unwrap().supertype, Some(a1)); assert_eq!(types.rec_groups.len(), 3); - types.merge_rec_groups_via_scc(&rec_groups); - // After merge: one group (g0), all three types in it. + types.merge_rec_group_sccs(); + + // After merge: one canonical group (g0), all types in it. assert_eq!(types.rec_groups.len(), 1); assert!(types.rec_groups.contains(&g0)); assert!(!types.rec_groups.contains(&g1)); assert!(!types.rec_groups.contains(&g2)); - assert_eq!(types.type_defs.get(&TypeId(0)).unwrap().rec_group, g0); - assert_eq!(types.type_defs.get(&TypeId(1)).unwrap().rec_group, g0); - assert_eq!(types.type_defs.get(&TypeId(2)).unwrap().rec_group, g0); + for ty in [a0, a1, b0, b1, c0, c1] { + assert_eq!(types.type_defs.get(&ty).unwrap().rec_group, g0); + } + + // And importantly, the valid supertype edges should still be preserved. + assert_eq!(types.type_defs.get(&a0).unwrap().supertype, Some(b0)); + assert_eq!(types.type_defs.get(&b1).unwrap().supertype, Some(c0)); + assert_eq!(types.type_defs.get(&c1).unwrap().supertype, Some(a1)); } diff --git a/crates/fuzzing/src/generators/gc_ops/types.rs b/crates/fuzzing/src/generators/gc_ops/types.rs index 29b77058df94..c0484355754b 100644 --- a/crates/fuzzing/src/generators/gc_ops/types.rs +++ b/crates/fuzzing/src/generators/gc_ops/types.rs @@ -2,9 +2,10 @@ use crate::generators::gc_ops::limits::GcOpsLimits; use crate::generators::gc_ops::ops::GcOp; -use crate::generators::gc_ops::scc::StronglyConnectedComponents; +use cranelift_entity::{PrimaryMap, SecondaryMap}; use serde::{Deserialize, Serialize}; -use std::collections::{BTreeMap, BTreeSet, VecDeque}; +use std::collections::{BTreeMap, BTreeSet}; +use wasmtime_environ::graphs::{Dfs, DfsEvent, Graph, StronglyConnectedComponents}; /// RecGroup ID struct definition. #[derive( @@ -46,6 +47,104 @@ pub struct Types { pub(crate) type_defs: BTreeMap, } +/// Supertype graph definition. +struct SupertypeGraph<'a> { + type_defs: &'a BTreeMap, +} + +/// Rec-group graph definition. +struct RecGroupGraph<'a> { + type_defs: &'a BTreeMap, + rec_groups: &'a BTreeMap>, +} + +impl Graph for RecGroupGraph<'_> { + type NodesIter<'a> + = std::iter::Copied>> + where + Self: 'a; + + fn nodes(&self) -> Self::NodesIter<'_> { + self.rec_groups.keys().copied() + } + + type SuccessorsIter<'a> + = std::vec::IntoIter + where + Self: 'a; + + fn successors(&self, group: RecGroupId) -> Self::SuccessorsIter<'_> { + let mut deps = BTreeSet::new(); + + if let Some(type_ids) = self.rec_groups.get(&group) { + for &ty in type_ids { + if let Some(super_ty) = self.type_defs[&ty].supertype { + let super_group = self.type_defs[&super_ty].rec_group; + if super_group != group { + deps.insert(super_group); + } + } + } + } + + deps.into_iter().collect::>().into_iter() + } +} + +impl Graph for SupertypeGraph<'_> { + type NodesIter<'a> + = std::iter::Copied> + where + Self: 'a; + + fn nodes(&self) -> Self::NodesIter<'_> { + self.type_defs.keys().copied() + } + + type SuccessorsIter<'a> + = std::option::IntoIter + where + Self: 'a; + + fn successors(&self, node: TypeId) -> Self::SuccessorsIter<'_> { + self.type_defs + .get(&node) + .and_then(|def| def.supertype) + .into_iter() + } +} + +/// Dense rec-group ID struct definition. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +struct DenseGroupId(u32); +wasmtime_environ::entity_impl!(DenseGroupId); + +/// Dense rec-group graph definition. +#[derive(Debug, Default)] +struct DenseRecGroupGraph { + edges: SecondaryMap>, +} + +impl Graph for DenseRecGroupGraph { + type NodesIter<'a> + = wasmtime_environ::Keys + where + Self: 'a; + + fn nodes(&self) -> Self::NodesIter<'_> { + self.edges.keys() + } + + type SuccessorsIter<'a> + = core::iter::Copied> + where + Self: 'a; + + fn successors(&self, node: DenseGroupId) -> Self::SuccessorsIter<'_> { + self.edges[node].iter().copied() + } +} + impl Types { /// Create a fresh `Types` allocator with no recursive groups defined yet. pub fn new() -> Self { @@ -55,234 +154,192 @@ impl Types { } } - /// Break cycles in supertype edges within each rec-group by dropping some edges. - pub fn break_type_cycles_in_rec_groups(&mut self) { - // Kill self-edges to avoid cycles. - for (id, def) in self.type_defs.iter_mut() { - if def.supertype == Some(*id) { - def.supertype = None; - } - } + /// Break cycles in the type -> supertype graph by dropping some supertype edges. + pub fn break_supertype_cycles(&mut self) { + let graph = SupertypeGraph { + type_defs: &self.type_defs, + }; - // Build group -> member list from current truth. - let mut members: BTreeMap> = BTreeMap::new(); - for (id, def) in self.type_defs.iter() { - members.entry(def.rec_group).or_default().push(*id); - } - - // For each group, break cycles in the TypeId supertype graph. - for (_g, ids) in members.iter() { - if ids.len() <= 1 { - continue; - } + let mut dfs = Dfs::new(graph.nodes()); + let mut seen = BTreeSet::new(); + let mut active = BTreeSet::new(); + let mut to_clear = BTreeSet::new(); - let id_set: BTreeSet = ids.iter().copied().collect(); - - // DFS from each node, if we revisit a node in the - // current path, we found a cycle. Break it by clearing supertype. - let mut visited = BTreeSet::new(); - for &start in ids { - if visited.contains(&start) { - continue; + while let Some(event) = dfs.next(&graph, |id| seen.contains(&id)) { + match event { + DfsEvent::Pre(id) => { + seen.insert(id); + active.insert(id); } + DfsEvent::Post(id) => { + active.remove(&id); + } + DfsEvent::AfterEdge(from, to) => { + if active.contains(&to) { + to_clear.insert(from); + } + } + } + } - let mut path = Vec::new(); - let mut path_set = BTreeSet::new(); - let mut cur = start; + for id in to_clear { + if let Some(def) = self.type_defs.get_mut(&id) { + def.supertype = None; + } + } + } - loop { - if path_set.contains(&cur) { - // Found a cycle. Clear supertype to break it. - if let Some(def) = self.type_defs.get_mut(&cur) { - def.supertype = None; - } - break; - } + /// Topological sort of rec-groups in place. + pub fn sort_rec_groups_topo( + &self, + groups: &mut Vec, + rec_groups: &BTreeMap>, + ) { + let graph = RecGroupGraph { + type_defs: &self.type_defs, + rec_groups, + }; - if visited.contains(&cur) { - break; - } + let mut dfs = Dfs::new(graph.nodes()); + let mut seen = BTreeSet::new(); + let mut active = BTreeSet::new(); - path.push(cur); - path_set.insert(cur); - visited.insert(cur); + groups.clear(); + groups.reserve(rec_groups.len()); - let next = self.type_defs.get(&cur).and_then(|d| d.supertype); - match next { - Some(st) if id_set.contains(&st) => cur = st, - _ => break, - } + while let Some(event) = dfs.next(&graph, |id| seen.contains(&id)) { + match event { + DfsEvent::Pre(id) => { + seen.insert(id); + active.insert(id); + } + DfsEvent::Post(id) => { + active.remove(&id); + groups.push(id); + } + DfsEvent::AfterEdge(from, to) => { + debug_assert!( + !active.contains(&to), + "cycle in rec-group dependency graph: {:?} -> {:?}", + from, + to + ); } } } } - /// Get the successors of the given rec-group. - /// It is used to find the SCCs. - fn rec_group_successors<'a>( - &'a self, - rec_groups: &'a BTreeMap>, - g: RecGroupId, - ) -> impl Iterator + 'a { - let mut deps = BTreeSet::::new(); - - for &ty in &rec_groups[&g] { - if let Some(st) = self.type_defs[&ty].supertype { - let h = self.type_defs[&st].rec_group; - if h != g { - deps.insert(h); + /// Topological sort of types by their supertype (supertype before subtype) in place. + pub fn sort_types_by_supertype(&self, out: &mut Vec) { + let graph = SupertypeGraph { + type_defs: &self.type_defs, + }; + + let mut dfs = Dfs::new(graph.nodes()); + let mut seen = BTreeSet::new(); + + out.clear(); + out.reserve(self.type_defs.len()); + + while let Some(event) = dfs.next(&graph, |id| seen.contains(&id)) { + match event { + DfsEvent::Pre(id) => { + seen.insert(id); } + DfsEvent::Post(id) => { + out.push(id); + } + DfsEvent::AfterEdge(_, _) => {} } } - - deps.into_iter() } /// Merge rec-groups that participate in dependency cycles. - pub fn merge_rec_groups_via_scc(&mut self, rec_groups: &BTreeMap>) { - let nodes = rec_groups.keys().copied(); - let sccs = - StronglyConnectedComponents::new(nodes, |g| self.rec_group_successors(rec_groups, g)); + pub fn merge_rec_group_sccs(&mut self) { + let mut rec_groups: BTreeMap> = self + .rec_groups + .iter() + .copied() + .map(|g| (g, Vec::new())) + .collect(); + + for (&id, def) in &self.type_defs { + rec_groups.entry(def.rec_group).or_default().push(id); + } - for groups in sccs.iter() { + let sccs = self.rec_group_sccs(&rec_groups); + + for groups in sccs { if groups.len() <= 1 { continue; } - // Deterministic canonical "keep" group. - // Smallest RecGroupId in the SCC. let keep = *groups.iter().min().unwrap(); - // Merge every other group into "keep" group by rewriting only the members of that group. - for &g in groups { - if g == keep { + for &group in &groups { + if group == keep { continue; } - if let Some(members) = rec_groups.get(&g) { - for &ty in members { + if let Some(type_ids) = rec_groups.get(&group) { + for &ty in type_ids { if let Some(def) = self.type_defs.get_mut(&ty) { def.rec_group = keep; } } } - // Drop g from the rec-group set. - self.rec_groups.remove(&g); + self.rec_groups.remove(&group); } } - - debug_assert!( - self.type_defs - .values() - .all(|d| self.rec_groups.contains(&d.rec_group)), - "after rec-group merge, some type_defs still reference removed rec-groups" - ); } - /// Topological sort of rec-groups. - pub fn sort_rec_groups_topo( + /// Find strongly-connected components in the rec-group dependency graph. + fn rec_group_sccs( &self, rec_groups: &BTreeMap>, - ) -> Vec { - // deps[g] = set of groups that must come before g - let mut deps: BTreeMap> = rec_groups - .keys() - .copied() - .map(|g| (g, BTreeSet::new())) - .collect(); + ) -> Vec> { + let mut dense_to_group = PrimaryMap::::new(); + let mut group_to_dense = BTreeMap::::new(); - for (&g, members) in rec_groups { - for &id in members { - let def = &self.type_defs[&id]; - if let Some(st) = def.supertype { - let st_group = self.type_defs[&st].rec_group; - if st_group != g { - deps.get_mut(&g).unwrap().insert(st_group); - } - } - } + for &group in rec_groups.keys() { + let dense = dense_to_group.push(group); + group_to_dense.insert(group, dense); } - // indeg[g] = number of prerequisites - let mut indeg: BTreeMap = deps.keys().copied().map(|g| (g, 0)).collect(); - for (&g, ds) in &deps { - *indeg.get_mut(&g).unwrap() = ds.len(); - } + let mut graph = DenseRecGroupGraph::default(); - // Prerequisite -> dependents - let mut users: BTreeMap> = BTreeMap::new(); - for (&g, ds) in &deps { - for &d in ds { - users.entry(d).or_default().push(g); - } + for dense in dense_to_group.keys() { + let _ = &graph.edges[dense]; } - // Kahn queue - let mut q = VecDeque::new(); - for (&g, &d) in &indeg { - if d == 0 { - q.push_back(g); - } - } + for (&group, type_ids) in rec_groups { + let from = group_to_dense[&group]; + let mut succs = BTreeSet::new(); - let mut out = Vec::with_capacity(indeg.len()); - while let Some(g) = q.pop_front() { - out.push(g); - if let Some(us) = users.get(&g) { - for &u in us { - let e = indeg.get_mut(&u).unwrap(); - *e -= 1; - if *e == 0 { - q.push_back(u); + for &ty in type_ids { + if let Some(super_ty) = self.type_defs[&ty].supertype { + let super_group = self.type_defs[&super_ty].rec_group; + if super_group != group { + succs.insert(group_to_dense[&super_group]); } } } - } - - debug_assert_eq!(out.len(), indeg.len(), "cycle in rec-group dependencies"); - out - } - /// Topological sort of types by their supertype (supertype before subtype). - pub fn sort_types_by_supertype(&self) -> Vec { - #[derive(Copy, Clone, Debug)] - enum Event { - Enter, - Exit, + graph.edges[from].extend(succs.into_iter()); } - let mut stack: Vec<(Event, TypeId)> = self - .type_defs - .keys() - .copied() - .map(|id| (Event::Enter, id)) - .collect(); - - stack.reverse(); - - let mut sorted = Vec::with_capacity(self.type_defs.len()); - let mut seen = BTreeSet::::new(); - - while let Some((event, id)) = stack.pop() { - match event { - Event::Enter => { - if seen.insert(id) { - stack.push((Event::Exit, id)); - - if let Some(super_id) = self.type_defs[&id].supertype { - if !seen.contains(&super_id) { - stack.push((Event::Enter, super_id)); - } - } - } - } - Event::Exit => { - sorted.push(id); - } - } - } - sorted + let sccs = StronglyConnectedComponents::new(&graph); + + sccs.iter() + .map(|(_, nodes)| { + nodes + .iter() + .copied() + .map(|dense| dense_to_group[dense]) + .collect::>() + }) + .collect() } /// Returns a fresh rec-group id that is not already in use. @@ -371,20 +428,8 @@ impl Types { } } - // Build rec_groups map for cycle detection and merging. - let mut rec_groups_map: BTreeMap> = self - .rec_groups - .iter() - .copied() - .map(|g| (g, Vec::new())) - .collect(); - - for (id, ty) in self.type_defs.iter() { - rec_groups_map.entry(ty.rec_group).or_default().push(*id); - } - - self.merge_rec_groups_via_scc(&rec_groups_map); - self.break_type_cycles_in_rec_groups(); + self.break_supertype_cycles(); + self.merge_rec_group_sccs(); debug_assert!( self.type_defs From f4fa0665c6d20a8512dc91297c364efb1f9583a5 Mon Sep 17 00:00:00 2001 From: Khagan Karimov Date: Mon, 9 Mar 2026 17:20:36 -0600 Subject: [PATCH 4/6] Address clippy warning --- crates/fuzzing/src/generators/gc_ops/types.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/fuzzing/src/generators/gc_ops/types.rs b/crates/fuzzing/src/generators/gc_ops/types.rs index c0484355754b..11aa1d91c553 100644 --- a/crates/fuzzing/src/generators/gc_ops/types.rs +++ b/crates/fuzzing/src/generators/gc_ops/types.rs @@ -220,9 +220,7 @@ impl Types { DfsEvent::AfterEdge(from, to) => { debug_assert!( !active.contains(&to), - "cycle in rec-group dependency graph: {:?} -> {:?}", - from, - to + "cycle in rec-group dependency graph: {from:?} -> {to:?}" ); } } From 4b696635aa1c29a8af1dbe87eee91a12c69c5669 Mon Sep 17 00:00:00 2001 From: Khagan Karimov Date: Mon, 9 Mar 2026 17:30:33 -0600 Subject: [PATCH 5/6] Add forgotten Cargo.toml --- crates/fuzzing/Cargo.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/fuzzing/Cargo.toml b/crates/fuzzing/Cargo.toml index 240985681687..e68f70f6a9b7 100644 --- a/crates/fuzzing/Cargo.toml +++ b/crates/fuzzing/Cargo.toml @@ -15,6 +15,8 @@ workspace = true wasmtime-test-util = { workspace = true, features = ['wast'] } [dependencies] +cranelift-entity = { workspace = true } +wasmtime-environ = { workspace = true } backtrace = { workspace = true } arbitrary = { workspace = true, features = ["derive"] } env_logger = { workspace = true } From cabff0715f4168888df7745a27ff83df86368f77 Mon Sep 17 00:00:00 2001 From: Khagan Karimov Date: Mon, 9 Mar 2026 17:39:14 -0600 Subject: [PATCH 6/6] Update Cargo.lock --- Cargo.lock | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.lock b/Cargo.lock index 6b7caf5db7f7..aa4b000082e2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4793,6 +4793,7 @@ dependencies = [ "backtrace", "cranelift-bforest", "cranelift-bitset", + "cranelift-entity", "env_logger 0.11.5", "futures", "indexmap 2.13.0",