From 3a33563fb0b8fcc84357e00e2e1122326d403b2b Mon Sep 17 00:00:00 2001 From: Malcolm van Raalte Date: Fri, 3 Jan 2025 11:49:35 -0500 Subject: [PATCH 1/9] wip --- src/algorithms/community/leiden.rs | 208 +++++++++++++++++++++++ src/algorithms/community/louvain.rs | 74 ++------- src/algorithms/community/mod.rs | 6 + src/algorithms/community/partitions.rs | 218 ++++++++++++++++++++++++- src/algorithms/community/utility.rs | 29 ++++ src/ext/hashset.rs | 18 ++ src/ext/iterator.rs | 43 +++++ src/graph/degree.rs | 66 +++++++- src/graph/query.rs | 43 ++++- src/lib.rs | 1 + 10 files changed, 636 insertions(+), 70 deletions(-) create mode 100644 src/algorithms/community/leiden.rs create mode 100644 src/algorithms/community/utility.rs diff --git a/src/algorithms/community/leiden.rs b/src/algorithms/community/leiden.rs new file mode 100644 index 0000000..5a82bc5 --- /dev/null +++ b/src/algorithms/community/leiden.rs @@ -0,0 +1,208 @@ +use crate::{ + algorithms::community::partitions, algorithms::community::utility, ext::hashset::IntSetExt, + AdjacentNode, Edge, EdgeDedupeStrategy, Error, ErrorKind, Graph, GraphSpecs, Node, +}; +use nohash::IntSet; +use std::collections::{HashSet, VecDeque}; +use std::fmt::Display; +use std::hash::Hash; + +struct Partition { + pub node_partition: Vec, + pub partition: Vec>, + pub degree_sums: Vec, +} + +impl Partition { + pub fn node_community(&self, node: usize) -> &IntSet { + &self.partition[self.node_partition[node]] + } + pub fn degree_sum(&self, node: usize) -> f64 { + self.degree_sums[self.node_partition[node]] + } +} + +pub fn leiden( + graph: &Graph, + weighted: bool, + resolution: Option, +) -> Result>, Error> +where + T: Hash + Eq + Clone + Ord + Display + Send + Sync, + A: Clone + Send + Sync, +{ + let _resolution = resolution.unwrap_or(0.05); + let partition = get_singleton_partition(graph, weighted); + // Ok(partitions::convert_usize_partitions_to_t(partition, &graph)) + let mut prev_partition: Option = None; + loop { + let new_partition = move_nodes_fast(graph, &partition, weighted, _resolution); + if partitions::partition_is_singleton(&new_partition.partition, graph.number_of_nodes()) + || (prev_partition.is_some() + && partitions::partitions_eq( + &new_partition.partition, + &prev_partition.unwrap().partition, + )) + { + return Ok(partitions::convert_usize_partitions_to_t( + new_partition.partition, + &graph, + )); + } + prev_partition = Some(new_partition); + } +} + +fn move_nodes_fast( + graph: &Graph, + partition: &Partition, + weighted: bool, + resolution: f64, +) -> Partition +where + T: Hash + Eq + Clone + Ord + Display + Send + Sync, + A: Clone + Send + Sync, +{ + let mut shuffled_indexes: VecDeque = + utility::get_shuffled_node_indexes(graph, None).into(); + while let Some(v) = shuffled_indexes.pop_front() { + let adjacent_communities = get_adjacent_communities(v, graph, &partition); + let (max_community, max_delta) = argmax( + v, + &adjacent_communities, + partition, + graph, + weighted, + resolution, + ); + // argmax + } + Partition { + partition: partition.partition.clone(), + node_partition: partition.node_partition.clone(), + degree_sums: partition.degree_sums.clone(), + } +} + +// fn argmax() + +fn get_singleton_partition(graph: &Graph, weighted: bool) -> Partition +where + T: Hash + Eq + Clone + Ord + Display + Send + Sync, + A: Clone + Send + Sync, +{ + let partition = partitions::get_singleton_partition(graph); + let node_partition: Vec = (0..graph.number_of_nodes()).collect(); + + let degree_sums: Vec = match weighted { + false => graph + .get_degree_for_all_node_indexes() + .into_iter() + .map(|x| x as f64) + .collect(), + true => graph.get_weighted_degree_for_all_node_indexes(), + }; + Partition { + partition, + node_partition, + degree_sums, + } +} + +fn get_adjacent_communities( + node: usize, + graph: &Graph, + partition: &Partition, +) -> IntSet +where + T: Hash + Eq + Clone + Ord + Display + Send + Sync, + A: Clone + Send + Sync, +{ + let mut adjacent_communities = IntSet::default(); + adjacent_communities.insert(partition.node_partition[node]); + for u in graph.get_successor_nodes_by_index(&node) { + adjacent_communities.insert(partition.node_partition[u.node_index]); + } + adjacent_communities +} + +fn argmax( + node: usize, + communities: &IntSet, + partition: &Partition, + graph: &Graph, + weighted: bool, + resolution: f64, +) -> (usize, f64) +where + T: Hash + Eq + Clone + Ord + Display + Send + Sync, + A: Clone + Send + Sync, +{ + let mut max_community = partition.node_partition[node]; + let mut max_delta = 0.0; + for community_index in communities.into_iter() { + let community = &partition.partition[*community_index]; + let delta = get_delta(node, partition, community, graph, weighted, resolution); + if delta > max_delta { + max_delta = delta; + max_community = *community_index; + } + } + (max_community, max_delta) +} + +fn get_delta( + v: usize, + partition: &Partition, + target: &IntSet, + graph: &Graph, + weighted: bool, + resolution: f64, +) -> f64 +where + T: Hash + Eq + Clone + Ord + Display + Send + Sync, + A: Clone + Send + Sync, +{ + if target.contains(&v) { + return 0.0; + } + let m = graph.size(weighted); + let source_community = partition.node_community(v); + let diff_source = + single_node_neighbor_cut_size(graph, v, &source_community.without(&v), weighted); + let diff_target = single_node_neighbor_cut_size(graph, v, &target, weighted); + let deg_v = match weighted { + true => graph.get_node_weighted_degree_by_index(v), + false => graph.get_node_degree_by_index(v) as f64, + }; + let degs_source = partition.degree_sum(v); + let degs_target = match target.len() == 0 { + true => 0.0, + false => partition.degree_sum(*target.into_iter().next().unwrap()), + }; + + ((diff_target - diff_source) + - resolution / (2.0 * m) * (deg_v.powf(2.0) + deg_v * (degs_target - degs_source))) + / m +} + +fn single_node_neighbor_cut_size( + graph: &Graph, + v: usize, + community: &IntSet, + weighted: bool, +) -> f64 +where + T: Hash + Eq + Clone + Ord + Display + Send + Sync, + A: Clone + Send + Sync, +{ + graph + .get_successor_nodes_by_index(&v) + .into_iter() + .filter(|x| community.contains(&x.node_index)) + .map(|x| match weighted { + true => x.weight, + false => 1.0, + }) + .sum() +} diff --git a/src/algorithms/community/louvain.rs b/src/algorithms/community/louvain.rs index a891240..c5b2879 100644 --- a/src/algorithms/community/louvain.rs +++ b/src/algorithms/community/louvain.rs @@ -1,15 +1,11 @@ use crate::{ - algorithms::community::partitions, Edge, EdgeDedupeStrategy, Error, ErrorKind, Graph, - GraphSpecs, Node, + algorithms::community::partitions, algorithms::community::utility::get_shuffled_node_indexes, + Edge, EdgeDedupeStrategy, Error, ErrorKind, Graph, GraphSpecs, Node, }; use nohash::{IntMap, IntSet}; -use rand::prelude::*; -use rand::seq::SliceRandom; -use rand::thread_rng; -use std::collections::HashSet; -use std::fmt::Display; use std::hash::Hash; use std::sync::Arc; +use std::{collections::HashSet, fmt::Display}; /** Returns the best partition of a graph, using the Louvain algorithm. @@ -83,14 +79,7 @@ where A: Clone + Send + Sync, { let _threshold = threshold.unwrap_or(0.0000001); - let partition: Vec> = (0..graph.number_of_nodes()) - .into_iter() - .map(|i| { - let mut set = IntSet::default(); - set.insert(i); - set - }) - .collect(); + let partition = partitions::get_singleton_partition(graph); let mut modularity = partitions::modularity_by_indexes(&graph, &partition, weighted, resolution).unwrap(); let m = graph.size(weighted); @@ -105,7 +94,9 @@ where partitions::modularity_by_indexes(&graph_com, &inner_partition, weighted, resolution) .unwrap(); if new_mod - modularity <= _threshold { - return Ok(convert_usize_partitons_to_t(partitions, &graph)); + return Ok(partitions::convert_usize_partitions_vec_to_t( + partitions, &graph, + )); } modularity = new_mod; graph_com = generate_graph(&graph_com, inner_partition); @@ -114,31 +105,9 @@ where inner_partition = z.1; improvement = z.2; } - Ok(convert_usize_partitons_to_t(partitions, &graph)) -} - -/// Converts a graph partition of usize replacements of node names T to -/// a partition using the node names T. -fn convert_usize_partitons_to_t( - partition: Vec>>, - graph: &Graph, -) -> Vec>> -where - T: Hash + Eq + Clone + Ord + Display + Send + Sync, - A: Clone + Send + Sync, -{ - partition - .into_iter() - .map(|v| { - v.into_iter() - .map(|hs| { - hs.into_iter() - .map(|u| graph.get_node_by_index(&u).unwrap().name.clone()) - .collect::>() - }) - .collect::>>() - }) - .collect() + Ok(partitions::convert_usize_partitions_vec_to_t( + partitions, &graph, + )) } /// Calculate one level of the Louvain partitions tree. @@ -205,29 +174,6 @@ fn compute_one_level( (new_partition, new_inner_partition, improvement) } -/// Returns a random number generator (RNG), optionally seeded. -fn get_rng(seed: Option) -> StdRng { - match seed { - None => { - let mut trng = thread_rng(); - StdRng::seed_from_u64(trng.next_u64()) - } - Some(s) => StdRng::seed_from_u64(s), - } -} - -/// Returns all the node indexes in `graph`, shuffled randomly. -fn get_shuffled_node_indexes(graph: &Graph, seed: Option) -> Vec -where - T: Hash + Eq + Clone + Ord + Display + Send + Sync, - A: Clone + Send + Sync, -{ - let mut rng = get_rng(seed); - let mut indexes: Vec = (0..graph.number_of_nodes()).collect(); - indexes.shuffle(&mut rng); - indexes -} - #[inline] fn add_degree_to_best_com(best_com: usize, deg_info: &mut DegreeInfo, directed: bool) { match directed { diff --git a/src/algorithms/community/mod.rs b/src/algorithms/community/mod.rs index b2af94d..5200c70 100644 --- a/src/algorithms/community/mod.rs +++ b/src/algorithms/community/mod.rs @@ -1,5 +1,11 @@ +// Find communities using the Leiden community detection algorithm. +pub mod leiden; + // Find communities using the Louvain community detection algorithm. pub mod louvain; // Measure the quality of community partitions. pub mod partitions; + +// Utility functions for community detection algorithms. +pub(crate) mod utility; diff --git a/src/algorithms/community/partitions.rs b/src/algorithms/community/partitions.rs index f0b515c..b3c514c 100644 --- a/src/algorithms/community/partitions.rs +++ b/src/algorithms/community/partitions.rs @@ -63,6 +63,22 @@ where node_indexes_count == num_nodes } +pub(crate) fn get_singleton_partition(graph: &Graph) -> Vec> +where + T: Hash + Eq + Clone + Ord + Display + Send + Sync, + A: Clone, +{ + let partition: Vec> = (0..graph.number_of_nodes()) + .into_iter() + .map(|i| { + let mut set = IntSet::default(); + set.insert(i); + set + }) + .collect(); + partition +} + /** Compute the modularity of the given graph partitions. @@ -115,8 +131,8 @@ where graph.get_weighted_in_degree_for_all_nodes().unwrap(), ), false => ( - convert_values_to_f64::(graph.get_out_degree_for_all_nodes().unwrap()), - convert_values_to_f64::(graph.get_in_degree_for_all_nodes().unwrap()), + convert_values_to_f64::(graph.get_out_degree_for_all_nodes().unwrap()), + convert_values_to_f64::(graph.get_in_degree_for_all_nodes().unwrap()), ), }; let m: f64 = outd.values().sum(); @@ -126,7 +142,7 @@ where false => { let deg = match weighted { true => graph.get_weighted_degree_for_all_nodes(), - false => convert_values_to_f64::(graph.get_degree_for_all_nodes()), + false => convert_values_to_f64::(graph.get_degree_for_all_nodes()), }; let deg_sum: f64 = deg.values().sum(); let m = deg_sum / 2.0; @@ -216,7 +232,67 @@ where Ok(communities.iter().map(community_contribution).sum()) } -fn convert_values_to_f64(hashmap: HashMap) -> HashMap +pub(crate) fn partition_is_singleton(partition: &[IntSet], num_nodes: usize) -> bool { + let len = partition.len(); + let flattened_len = partition.into_iter().flatten().count(); + flattened_len == len && len == num_nodes +} + +pub(crate) fn partitions_eq( + partition1: &Vec>, + partition2: &Vec>, +) -> bool { + let first_of_each_set1: Vec<&usize> = partition1 + .iter() + .map(|hs| hs.iter().next().unwrap()) + .collect(); + let matching_partition2_indexes: Vec = first_of_each_set1 + .iter() + .map(|i| partition2.iter().position(|hs| hs.contains(i)).unwrap()) + .collect(); + partition1 + .into_iter() + .zip(matching_partition2_indexes) + .all(|(hs1, i)| hs1 == &partition2[i]) +} + +/// Converts a graph partition of usize replacements of node names T to +/// a partition using the node names T. +pub(crate) fn convert_usize_partitions_vec_to_t( + partitions_vec: Vec>>, + graph: &Graph, +) -> Vec>> +where + T: Hash + Eq + Clone + Ord + Display + Send + Sync, + A: Clone + Send + Sync, +{ + partitions_vec + .into_iter() + .map(|v| convert_usize_partitions_to_t(v, graph)) + .collect() +} + +/// Converts a graph partition of usize replacements of node names T to +/// a partition using the node names T. +pub(crate) fn convert_usize_partitions_to_t( + partitions: Vec>, + graph: &Graph, +) -> Vec> +where + T: Hash + Eq + Clone + Ord + Display + Send + Sync, + A: Clone + Send + Sync, +{ + partitions + .into_iter() + .map(|hs| { + hs.into_iter() + .map(|u| graph.get_node_by_index(&u).unwrap().name.clone()) + .collect::>() + }) + .collect::>>() +} + +fn convert_values_to_f64(hashmap: HashMap) -> HashMap where T: Eq + Hash, { @@ -226,3 +302,137 @@ where fn convert_values_to_f64_vec(values: Vec) -> Vec { values.into_iter().map(|v| v as f64).collect() } + +#[cfg(test)] +mod tests { + + use super::*; + use crate::{Edge, Graph, GraphSpecs}; + use std::collections::HashMap; + + #[test] + fn test_convert_values_to_f64() { + let hashmap: HashMap<&str, usize> = vec![("a", 1), ("b", 2), ("c", 3)] + .into_iter() + .collect::>(); + let f64_hashmap = convert_values_to_f64(hashmap); + assert_eq!(f64_hashmap.get("a").unwrap(), &1.0); + assert_eq!(f64_hashmap.get("b").unwrap(), &2.0); + assert_eq!(f64_hashmap.get("c").unwrap(), &3.0); + } + + #[test] + fn test_convert_values_to_f64_vec() { + let values = vec![1, 2, 3]; + let f64_vec = convert_values_to_f64_vec(values); + assert_eq!(f64_vec, vec![1.0, 2.0, 3.0]); + } + + #[test] + fn test_convert_usize_partitions_to_t() { + let edges = vec![ + Edge::new("n1", "n2"), + Edge::new("n3", "n4"), + Edge::new("n5", "n6"), + ]; + let graph: Graph<&str, ()> = + Graph::new_from_nodes_and_edges(vec![], edges, GraphSpecs::undirected_create_missing()) + .unwrap(); + let partitions = vec![ + vec![0, 1].into_iter().collect(), + vec![2, 3].into_iter().collect(), + vec![4, 5].into_iter().collect(), + ]; + let converted = convert_usize_partitions_to_t(partitions, &graph); + let hs1: HashSet<&str> = vec!["n1", "n2"].into_iter().collect(); + let hs2: HashSet<&str> = vec!["n3", "n4"].into_iter().collect(); + let hs3: HashSet<&str> = vec!["n5", "n6"].into_iter().collect(); + assert_eq!(converted[0], hs1); + assert_eq!(converted[1], hs2); + assert_eq!(converted[2], hs3); + } + + #[test] + fn test_convert_usize_partitions_vec_to_t() { + let edges = vec![ + Edge::new("n1", "n2"), + Edge::new("n3", "n4"), + Edge::new("n5", "n6"), + ]; + let graph: Graph<&str, ()> = + Graph::new_from_nodes_and_edges(vec![], edges, GraphSpecs::undirected_create_missing()) + .unwrap(); + let partitions = vec![ + vec![0, 1].into_iter().collect(), + vec![2, 3].into_iter().collect(), + vec![4, 5].into_iter().collect(), + ]; + let converted = convert_usize_partitions_vec_to_t(vec![partitions], &graph); + let hs1: HashSet<&str> = vec!["n1", "n2"].into_iter().collect(); + let hs2: HashSet<&str> = vec!["n3", "n4"].into_iter().collect(); + let hs3: HashSet<&str> = vec!["n5", "n6"].into_iter().collect(); + assert_eq!(converted[0][0], hs1); + assert_eq!(converted[0][1], hs2); + assert_eq!(converted[0][2], hs3); + } + + #[test] + fn test_partition_is_singleton() { + let partition = vec![ + vec![0, 1].into_iter().collect(), + vec![2, 3].into_iter().collect(), + vec![4, 5].into_iter().collect(), + ]; + assert!(!partition_is_singleton(&partition, 6)); + let partition = vec![ + vec![0].into_iter().collect(), + vec![1].into_iter().collect(), + vec![2].into_iter().collect(), + ]; + assert!(partition_is_singleton(&partition, 3)); + } + + #[test] + fn test_partitions_eq1() { + let partition1 = vec![ + vec![0, 1].into_iter().collect(), + vec![2, 3].into_iter().collect(), + vec![4, 5].into_iter().collect(), + ]; + let partition2 = vec![ + vec![0, 1].into_iter().collect(), + vec![2, 3].into_iter().collect(), + vec![4, 5].into_iter().collect(), + ]; + assert!(partitions_eq(&partition1, &partition2)); + } + + #[test] + fn test_partitions_eq2() { + let partition1 = vec![ + vec![2, 3].into_iter().collect(), + vec![0, 1].into_iter().collect(), + vec![4, 5].into_iter().collect(), + ]; + let partition2 = vec![ + vec![0, 1].into_iter().collect(), + vec![2, 3].into_iter().collect(), + vec![4, 5].into_iter().collect(), + ]; + assert!(partitions_eq(&partition1, &partition2)); + } + + #[test] + fn test_partitions_eq3() { + let partition1 = vec![ + vec![0, 1, 2].into_iter().collect(), + vec![3, 4, 5].into_iter().collect(), + ]; + let partition2 = vec![ + vec![0, 1].into_iter().collect(), + vec![2, 3].into_iter().collect(), + vec![4, 5].into_iter().collect(), + ]; + assert!(!partitions_eq(&partition1, &partition2)); + } +} diff --git a/src/algorithms/community/utility.rs b/src/algorithms/community/utility.rs new file mode 100644 index 0000000..5d34d38 --- /dev/null +++ b/src/algorithms/community/utility.rs @@ -0,0 +1,29 @@ +use crate::Graph; +use rand::prelude::*; +use rand::seq::SliceRandom; +use rand::thread_rng; +use std::fmt::Display; +use std::hash::Hash; + +/// Returns all the node indexes in `graph`, shuffled randomly. +pub(crate) fn get_shuffled_node_indexes(graph: &Graph, seed: Option) -> Vec +where + T: Hash + Eq + Clone + Ord + Display + Send + Sync, + A: Clone + Send + Sync, +{ + let mut rng = get_rng(seed); + let mut indexes: Vec = (0..graph.number_of_nodes()).collect(); + indexes.shuffle(&mut rng); + indexes +} + +/// Returns a random number generator (RNG), optionally seeded. +fn get_rng(seed: Option) -> StdRng { + match seed { + None => { + let mut trng = thread_rng(); + StdRng::seed_from_u64(trng.next_u64()) + } + Some(s) => StdRng::seed_from_u64(s), + } +} diff --git a/src/ext/hashset.rs b/src/ext/hashset.rs index 2872a28..f3d2194 100644 --- a/src/ext/hashset.rs +++ b/src/ext/hashset.rs @@ -1,5 +1,6 @@ use std::collections::HashSet; use std::hash::Hash; +use nohash::{IntSet, IsEnabled}; /// Extends `HashSet` with the `without` method. pub trait HashSetExt { @@ -18,6 +19,23 @@ where } } +/// Extends `HashSet` with the `without` method. +pub trait IntSetExt { + fn without(&self, value: &T) -> IntSet; +} + +impl IntSetExt for IntSet +where + T: Clone + Eq + Hash + IsEnabled, +{ + /** + Returns a copy of the `HashSet` that doesn't contain the specified `value`. + */ + fn without(&self, value: &T) -> IntSet { + self.iter().filter(|v| *v != value).cloned().collect() + } +} + #[cfg(test)] mod tests { diff --git a/src/ext/iterator.rs b/src/ext/iterator.rs index 2025259..ea0054c 100644 --- a/src/ext/iterator.rs +++ b/src/ext/iterator.rs @@ -1,4 +1,6 @@ use itertools::Itertools; +use nohash::{IntSet, IsEnabled}; +use std::hash::Hash; pub struct ChunkByCount { #[allow(clippy::type_complexity)] @@ -20,10 +22,39 @@ where } } +pub struct UniqueByNoHash { + inner: I, + seen: IntSet, + f: F, +} + +impl Iterator for UniqueByNoHash +where + I: Iterator, + V: IsEnabled + Hash + Eq, + F: FnMut(&I::Item) -> V, +{ + type Item = I::Item; + + fn next(&mut self) -> Option { + self.inner.next().filter(|x| { + let key = (self.f)(x); + let r = !self.seen.contains(&key); + self.seen.insert(key); + r + }) + } +} + pub trait IteratorExt: Iterator { fn chunk_by_count(self) -> ChunkByCount where Self: Sized; + fn unique_by_no_hash(self, f: F) -> UniqueByNoHash + where + Self: Sized, + V: IsEnabled + Hash + Eq, + for<'a> F: FnMut(&'a Self::Item) -> V; } impl IteratorExt for I @@ -39,6 +70,18 @@ where inner: self.chunk_by(|i| i.clone()), } } + fn unique_by_no_hash(self, f: F) -> UniqueByNoHash + where + Self: Sized, + V: IsEnabled + Hash + Eq, + F: FnMut(&I::Item) -> V, + { + UniqueByNoHash:: { + inner: self, + seen: IntSet::::default(), + f, + } + } } #[cfg(test)] diff --git a/src/graph/degree.rs b/src/graph/degree.rs index 63bf8af..b670ef4 100644 --- a/src/graph/degree.rs +++ b/src/graph/degree.rs @@ -1,5 +1,5 @@ use super::Graph; -use crate::{Error, ErrorKind}; +use crate::{AdjacentNode, Error, ErrorKind}; use std::collections::HashMap; use std::fmt::Display; use std::hash::Hash; @@ -164,6 +164,19 @@ where } } + pub(crate) fn get_node_degree_by_index(&self, node_index: usize) -> usize { + let adjacent = self.get_adjacent_nodes_by_index(node_index); + adjacent + .iter() + .map(|adj| { + if adj.node_index == node_index { + return 2; + } + 1 + }) + .sum() + } + /** Computes the in-degree of a given node. The node in-degree is the number of edges (u, v) where v is the node. @@ -266,6 +279,19 @@ where } } + pub(crate) fn get_node_weighted_degree_by_index(&self, node_index: usize) -> f64 { + let adjacent = self.get_adjacent_nodes_by_index(node_index); + adjacent + .iter() + .map(|adj| { + if adj.node_index == node_index { + return adj.weight * 2.0; + } + adj.weight + }) + .sum() + } + /** Computes the weighted in-degree of a given node. The weighted in-degree is sum of the weights of edges into to the node. @@ -473,3 +499,41 @@ where .collect() } } + +#[cfg(test)] +mod tests { + + use crate::{Edge, Graph, GraphSpecs}; + + #[test] + fn test_get_node_degree_by_index() { + let edges = vec![Edge::new(0, 1), Edge::new(1, 2), Edge::new(2, 2)]; + let specs = GraphSpecs { + self_loops: true, + ..GraphSpecs::directed_create_missing() + }; + let graph: Graph = + Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); + assert_eq!(graph.get_node_degree_by_index(0), 1); + assert_eq!(graph.get_node_degree_by_index(1), 2); + assert_eq!(graph.get_node_degree_by_index(2), 3); + } + + #[test] + fn test_get_weighted_node_degree_by_index() { + let edges = vec![ + Edge::with_weight(0, 1, 0.5), + Edge::with_weight(1, 2, 6.3), + Edge::with_weight(2, 2, 10.0), + ]; + let specs = GraphSpecs { + self_loops: true, + ..GraphSpecs::directed_create_missing() + }; + let graph: Graph = + Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); + assert_eq!(graph.get_node_weighted_degree_by_index(0), 0.5); + assert_eq!(graph.get_node_weighted_degree_by_index(1), 6.8); + assert_eq!(graph.get_node_weighted_degree_by_index(2), 26.3); + } +} diff --git a/src/graph/query.rs b/src/graph/query.rs index 083e65c..00ef36c 100644 --- a/src/graph/query.rs +++ b/src/graph/query.rs @@ -1,5 +1,7 @@ use super::Graph; -use crate::{ext::vec::VecExt, AdjacentNode, Edge, Error, ErrorKind, Node}; +use crate::{ + ext::iterator::IteratorExt, ext::vec::VecExt, AdjacentNode, Edge, Error, ErrorKind, Node, +}; use itertools::Itertools; use nohash::IntSet; use std::collections::{HashMap, HashSet}; @@ -919,6 +921,17 @@ where .collect() } + pub(crate) fn get_adjacent_nodes_by_index(&self, node_index: usize) -> Vec<&AdjacentNode> { + match self.specs.directed { + true => self.successors_vec[node_index] + .iter() + .chain(self.predecessors_vec[node_index].iter()) + .unique_by_no_hash(|adj| adj.node_index) // self-loops wind up in successors and predecessors + .collect(), + false => self.successors_vec[node_index].iter().collect(), + } + } + /// Gets a `HashMap` of all the successor edges. pub fn get_successors_map(&self) -> &HashMap> where @@ -1056,3 +1069,31 @@ where } } } + +#[cfg(test)] +mod tests { + + use crate::{Edge, Graph, GraphSpecs}; + + #[test] + fn test_get_adjacent_nodes_by_index() { + let edges = vec![Edge::new(0, 1), Edge::new(1, 2), Edge::new(2, 2)]; + let specs = GraphSpecs { + self_loops: true, + ..GraphSpecs::directed_create_missing() + }; + let graph: Graph = + Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); + let result = graph.get_adjacent_nodes_by_index(0); + assert_eq!(result.len(), 1); + assert_eq!(result[0].node_index, 1); + let result = graph.get_adjacent_nodes_by_index(1); + assert_eq!(result.len(), 2); + assert_eq!(result[0].node_index, 2); + assert_eq!(result[1].node_index, 0); + let result = graph.get_adjacent_nodes_by_index(2); + assert_eq!(result.len(), 2); + assert_eq!(result[0].node_index, 2); + assert_eq!(result[1].node_index, 1); + } +} diff --git a/src/lib.rs b/src/lib.rs index 715dfd3..422bd91 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -186,6 +186,7 @@ mod error; pub use error::{Error, ErrorKind}; mod ext; +pub use ext::iterator::IteratorExt; mod graph; pub use graph::Graph; From a80e917c891785cfceb3bdede9995c579854d9e9 Mon Sep 17 00:00:00 2001 From: Malcolm van Raalte Date: Fri, 3 Jan 2025 16:38:12 -0500 Subject: [PATCH 2/9] wip --- src/algorithms/community/leiden.rs | 313 +++++++++++++++++++++++++++-- src/lib.rs | 1 - 2 files changed, 295 insertions(+), 19 deletions(-) diff --git a/src/algorithms/community/leiden.rs b/src/algorithms/community/leiden.rs index 5a82bc5..f95780f 100644 --- a/src/algorithms/community/leiden.rs +++ b/src/algorithms/community/leiden.rs @@ -3,10 +3,13 @@ use crate::{ AdjacentNode, Edge, EdgeDedupeStrategy, Error, ErrorKind, Graph, GraphSpecs, Node, }; use nohash::IntSet; +use serde::de; use std::collections::{HashSet, VecDeque}; use std::fmt::Display; use std::hash::Hash; +use super::partitions::modularity_by_indexes; + struct Partition { pub node_partition: Vec, pub partition: Vec>, @@ -20,6 +23,58 @@ impl Partition { pub fn degree_sum(&self, node: usize) -> f64 { self.degree_sums[self.node_partition[node]] } + pub fn move_node( + &mut self, + v: usize, + target: IntSet, + graph: &Graph, + weighted: bool, + ) where + T: Hash + Eq + Clone + Ord + Display + Send + Sync, + A: Clone + Send + Sync, + { + let source_partition_idx = self.node_partition[v]; + let target_partition_idx: usize; + if target.len() > 0 { + let el = target.iter().next().unwrap(); + target_partition_idx = self.node_partition[*el]; + } else { + target_partition_idx = self.partition.len(); + self.degree_sums.push(0.0); + } + + // Remove `v` from its old community and place it into the target partition + self.partition[source_partition_idx].remove(&v); + self.partition[target_partition_idx].insert(v); + + // Also update the sum of node degrees in that partition + let deg_v = match weighted { + true => graph.get_node_weighted_degree_by_index(v), + false => graph.get_node_degree_by_index(v) as f64, + }; + self.degree_sums[source_partition_idx] -= deg_v; + self.degree_sums[target_partition_idx] += deg_v; + + // Update v's entry in the index lookup table + self.node_partition[v] = target_partition_idx; + + // If the original partition is empty now, that we removed v from it, remove it and adjust the indexes in _node_part + if self.partition[source_partition_idx].len() == 0 { + self.partition.remove(source_partition_idx); + self.degree_sums.remove(source_partition_idx); + self.node_partition = self + .node_partition + .iter() + .map(|i| { + if *i < source_partition_idx { + *i + } else { + *i - 1 + } + }) + .collect(); + } + } } pub fn leiden( @@ -66,16 +121,22 @@ where let mut shuffled_indexes: VecDeque = utility::get_shuffled_node_indexes(graph, None).into(); while let Some(v) = shuffled_indexes.pop_front() { - let adjacent_communities = get_adjacent_communities(v, graph, &partition); + let adjacent_community_indexes = get_adjacent_communities(v, graph, &partition); + let mut adjacent_communities: Vec<&IntSet> = adjacent_community_indexes + .into_iter() + .map(|x| &partition.partition[x]) + .collect(); + let empty = IntSet::default(); + adjacent_communities.push(&empty); let (max_community, max_delta) = argmax( v, - &adjacent_communities, partition, + &adjacent_communities, graph, weighted, resolution, ); - // argmax + if max_delta > 0.0 {} } Partition { partition: partition.partition.clone(), @@ -84,8 +145,6 @@ where } } -// fn argmax() - fn get_singleton_partition(graph: &Graph, weighted: bool) -> Partition where T: Hash + Eq + Clone + Ord + Display + Send + Sync, @@ -126,29 +185,31 @@ where adjacent_communities } -fn argmax( - node: usize, - communities: &IntSet, +fn argmax<'a, T, A>( + v: usize, partition: &Partition, + communities: &'a [&IntSet], graph: &Graph, weighted: bool, resolution: f64, -) -> (usize, f64) +) -> (&'a IntSet, f64) where T: Hash + Eq + Clone + Ord + Display + Send + Sync, A: Clone + Send + Sync, { - let mut max_community = partition.node_partition[node]; - let mut max_delta = 0.0; - for community_index in communities.into_iter() { - let community = &partition.partition[*community_index]; - let delta = get_delta(node, partition, community, graph, weighted, resolution); - if delta > max_delta { - max_delta = delta; - max_community = *community_index; + let mut idx = 0; + let mut opt = communities[idx]; + let mut val = get_delta(v, partition, opt, graph, weighted, resolution); + for k in 1..communities.len() { + let optk = &communities[k]; + let valk = get_delta(v, partition, optk, graph, weighted, resolution); + if valk > val { + idx = k; + opt = optk; + val = valk; } } - (max_community, max_delta) + (opt, val) } fn get_delta( @@ -206,3 +267,219 @@ where }) .sum() } + +#[cfg(test)] +mod tests { + + use super::*; + use crate::{Edge, Graph, GraphSpecs, Node}; + use assert_approx_eq::assert_approx_eq; + use std::sync::Arc; + + #[test] + fn test_single_node_neighbor_cut_size_1() { + let edges: Vec>> = vec![ + Edge::new(0, 1), + Edge::new(1, 2), + Edge::new(1, 3), + Edge::new(1, 4), + ]; + let specs = GraphSpecs::directed_create_missing(); + let graph = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); + let community = vec![1, 2, 3].into_iter().collect(); + let result = single_node_neighbor_cut_size(&graph, 0, &community, false); + assert_eq!(result, 1.0); + let result = single_node_neighbor_cut_size(&graph, 1, &community, false); + assert_eq!(result, 2.0); + let result = single_node_neighbor_cut_size(&graph, 2, &community, false); + assert_eq!(result, 0.0); + } + + #[test] + fn test_single_node_neighbor_cut_size_2() { + let edges: Vec>> = vec![ + Edge::with_weight(0, 1, 1.1), + Edge::with_weight(1, 2, 2.3), + Edge::with_weight(1, 3, 3.5), + Edge::with_weight(1, 4, 4.7), + ]; + let specs = GraphSpecs::directed_create_missing(); + let graph = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); + let community = vec![1, 2, 3].into_iter().collect(); + let result = single_node_neighbor_cut_size(&graph, 0, &community, true); + assert_eq!(result, 1.1); + let result = single_node_neighbor_cut_size(&graph, 1, &community, true); + assert_eq!(result, 5.8); + let result = single_node_neighbor_cut_size(&graph, 2, &community, true); + assert_eq!(result, 0.0); + } + + #[test] + fn test_get_delta_1() { + let edges: Vec>> = vec![ + Edge::with_weight(0, 1, 1.1), + Edge::with_weight(1, 2, 2.3), + Edge::with_weight(1, 3, 3.5), + Edge::with_weight(1, 4, 4.7), + ]; + let specs = GraphSpecs::directed_create_missing(); + let graph = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); + let partition = Partition { + partition: vec![ + vec![0, 1].into_iter().collect(), + vec![2, 3, 4].into_iter().collect(), + ], + node_partition: vec![0, 0, 1, 1, 1], + degree_sums: vec![12.0, 24.0], + }; + let target = vec![2, 3, 4].into_iter().collect(); + let result = get_delta(1, &partition, &target, &graph, true, 1.0); + assert_approx_eq!(result, -0.11206896551724145); + } + + #[test] + fn test_get_delta_2() { + let edges: Vec>> = vec![ + Edge::with_weight(0, 1, 1.1), + Edge::with_weight(1, 2, 2.3), + Edge::with_weight(1, 3, 3.5), + Edge::with_weight(1, 4, 4.7), + ]; + let specs = GraphSpecs::undirected_create_missing(); + let graph = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); + let partition = Partition { + partition: vec![ + vec![0, 1].into_iter().collect(), + vec![2, 3, 4].into_iter().collect(), + ], + node_partition: vec![0, 0, 1, 1, 1], + degree_sums: vec![12.0, 24.0], + }; + let target = vec![2, 3, 4].into_iter().collect(); + let result = get_delta(1, &partition, &target, &graph, true, 1.0); + assert_approx_eq!(result, -0.20689655172413812); + } + + #[test] + fn test_get_adjacent_communities() { + let nodes = vec![ + Node::from_name(0), + Node::from_name(1), + Node::from_name(2), + Node::from_name(3), + Node::from_name(4), + ]; + let edges: Vec>> = vec![ + Edge::new(0, 2), + Edge::new(1, 2), + Edge::new(2, 3), + Edge::new(2, 4), + ]; + let specs = GraphSpecs::directed_create_missing(); + let graph = Graph::new_from_nodes_and_edges(nodes, edges, specs).unwrap(); + let partition = Partition { + partition: vec![ + vec![0, 1].into_iter().collect(), + vec![2].into_iter().collect(), + vec![3].into_iter().collect(), + vec![4].into_iter().collect(), + ], + node_partition: vec![0, 0, 1, 2, 3], + degree_sums: vec![0.0, 0.0, 0.0, 0.0], + }; + let result = get_adjacent_communities(0, &graph, &partition); + assert_eq!(result.len(), 2); + assert!(result.contains(&0)); + assert!(result.contains(&1)); + let result = get_adjacent_communities(1, &graph, &partition); + assert_eq!(result.len(), 2); + assert!(result.contains(&0)); + assert!(result.contains(&1)); + let result = get_adjacent_communities(2, &graph, &partition); + assert_eq!(result.len(), 3); + assert!(result.contains(&1)); + assert!(result.contains(&2)); + assert!(result.contains(&3)); + } + + #[test] + fn test_argmax_1() { + let graph = get_graph_for_argmax(true); + let partition = get_partition_for_argmax(); + let communities = get_communities_for_argmax(&partition, &graph); + let result = argmax(0, &partition, &communities, &graph, true, 1.0); + assert_eq!(result.0.len(), 1); + assert!(result.0.contains(&2)); + assert_approx_eq!(result.1, 0.09033145065398336); + let result = argmax(0, &partition, &communities, &graph, false, 1.0); + assert_eq!(result.0.len(), 1); + assert!(result.0.contains(&2)); + assert_approx_eq!(result.1, 0.21875); + } + + #[test] + fn test_argmax_2() { + let graph = get_graph_for_argmax(false); + let partition = get_partition_for_argmax(); + let communities = get_communities_for_argmax(&partition, &graph); + let result = argmax(0, &partition, &communities, &graph, true, 1.0); + assert_eq!(result.0.len(), 1); + assert!(result.0.contains(&2)); + assert_approx_eq!(result.1, 0.09033145065398336); + let result = argmax(0, &partition, &communities, &graph, false, 1.0); + assert_eq!(result.0.len(), 1); + assert!(result.0.contains(&2)); + assert_approx_eq!(result.1, 0.21875); + } + + #[test] + fn test_move_node() { + // TODO + } + + fn get_graph_for_argmax(directed: bool) -> Graph { + let nodes = vec![ + Node::from_name(0), + Node::from_name(1), + Node::from_name(2), + Node::from_name(3), + Node::from_name(4), + ]; + let edges: Vec>> = vec![ + Edge::with_weight(0, 2, 1.1), + Edge::with_weight(1, 2, 2.3), + Edge::with_weight(2, 3, 3.5), + Edge::with_weight(2, 4, 4.7), + ]; + let specs = if directed { + GraphSpecs::directed_create_missing() + } else { + GraphSpecs::undirected_create_missing() + }; + Graph::new_from_nodes_and_edges(nodes, edges, specs).unwrap() + } + + fn get_partition_for_argmax() -> Partition { + Partition { + partition: vec![ + vec![0, 1].into_iter().collect(), + vec![2].into_iter().collect(), + vec![3].into_iter().collect(), + vec![4].into_iter().collect(), + ], + node_partition: vec![0, 0, 1, 2, 3], + degree_sums: vec![0.0, 0.0, 0.0, 0.0], + } + } + + fn get_communities_for_argmax<'a>( + partition: &'a Partition, + graph: &Graph, + ) -> Vec<&'a IntSet> { + let community_indexes = get_adjacent_communities(0, &graph, &partition); + community_indexes + .into_iter() + .map(|x| &partition.partition[x]) + .collect() + } +} diff --git a/src/lib.rs b/src/lib.rs index 422bd91..715dfd3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -186,7 +186,6 @@ mod error; pub use error::{Error, ErrorKind}; mod ext; -pub use ext::iterator::IteratorExt; mod graph; pub use graph::Graph; From 24f3a68ca007d6b93ad9c2f67cb971478c2b1a67 Mon Sep 17 00:00:00 2001 From: Malcolm van Raalte Date: Mon, 6 Jan 2025 17:09:11 -0500 Subject: [PATCH 3/9] wip --- Cargo.toml | 1 + src/algorithms/boundary/mod.rs | 176 +++++++++++ .../community/leiden/aggregate_graph.rs | 60 ++++ .../community/{leiden.rs => leiden/mod.rs} | 275 ++++++++++-------- src/algorithms/community/leiden/partition.rs | 74 +++++ src/algorithms/cuts/mod.rs | 152 ++++++++++ src/algorithms/mod.rs | 6 + src/graph/query.rs | 20 ++ 8 files changed, 644 insertions(+), 120 deletions(-) create mode 100644 src/algorithms/boundary/mod.rs create mode 100644 src/algorithms/community/leiden/aggregate_graph.rs rename src/algorithms/community/{leiden.rs => leiden/mod.rs} (67%) create mode 100644 src/algorithms/community/leiden/partition.rs create mode 100644 src/algorithms/cuts/mod.rs diff --git a/Cargo.toml b/Cargo.toml index 59a1f1f..a7b8dbb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ build = "build.rs" [dependencies] assert_approx_eq = "1.1.0" +assert_unordered = "0.3.5" doc-comment = "0.3.3" itertools = "0.13.0" lapack-sys = { version = "0.14.0", optional = true } diff --git a/src/algorithms/boundary/mod.rs b/src/algorithms/boundary/mod.rs new file mode 100644 index 0000000..99e2af9 --- /dev/null +++ b/src/algorithms/boundary/mod.rs @@ -0,0 +1,176 @@ +use crate::{Edge, Error, ErrorKind, Graph}; +use nohash::IntSet; +use std::collections::HashSet; +use std::fmt::Debug; +use std::fmt::Display; +use std::hash::Hash; +use std::sync::Arc; + +/** +Returns the edge boundary for a bunch of nodes. + +The *edge boundary* of a set *S* with respect to a set *T* is the +set of edges (*u*, *v*) such that *u* is in *S* and *v* is in *T*. +If *T* is not specified, it is assumed to be the set of all nodes +not in *S*. + +# Arguments + +* `graph`: the `Graph` the nodes are in +* `nbunch1`: the first set of nodes +* `nbunch2`: the second set of nodes + +# Examples + +``` +use graphrs::{algorithms::boundary::edge_boundary, generators, Graph}; + +let graph = generators::social::karate_club_graph(); +let edges = edge_boundary(&graph, &[0, 1, 2, 3], Some(&[4, 5, 6, 7])).unwrap(); +assert_eq!(edges.len(), 7); +``` + +*/ +pub fn edge_boundary<'a, T, A>( + graph: &'a Graph, + nbunch1: &[T], + nbunch2: Option<&[T]>, +) -> Result>>, Error> +where + T: Hash + Eq + Clone + Ord + Debug + Display + Send + Sync, + A: Clone + Send + Sync, +{ + if !graph.has_nodes(nbunch1) { + return Err(Error { + kind: ErrorKind::NodeNotFound, + message: "One or more of `nbunch1` were not found in the graph.".to_string(), + }); + } + if nbunch2.is_some() && !graph.has_nodes(nbunch2.unwrap()) { + return Err(Error { + kind: ErrorKind::NodeNotFound, + message: "One or more of `nbunch2` were not found in the graph.".to_string(), + }); + } + + let out_edges = match graph.specs.directed { + true => graph.get_out_edges_for_nodes(nbunch1), + false => graph.get_edges_for_nodes(nbunch1), + } + .unwrap(); + let nset1 = nbunch1.iter().cloned().collect::>(); + let nset2 = match nbunch2 { + Some(nbunch2) => nbunch2.iter().cloned().collect::>(), + None => graph + .get_all_node_names() + .into_iter() + .filter(|n| !nset1.contains(n)) + .cloned() + .collect::>(), + }; + return Ok(out_edges + .into_iter() + .filter(|e| { + (nset1.contains(&e.u) && nset2.contains(&e.v)) + || (nset2.contains(&e.u) && nset1.contains(&e.v)) + }) + .collect()); +} + +pub(crate) fn edge_boundary_by_indexes<'a, T, A>( + graph: &'a Graph, + nbunch1: &[usize], + nbunch2: &[usize], +) -> Vec<(usize, usize, f64)> +where + T: Hash + Eq + Clone + Ord + Debug + Display + Send + Sync, + A: Clone + Send + Sync, +{ + let out_edges = graph.get_out_edges_for_node_indexes(nbunch1); + let nset1 = nbunch1.iter().cloned().collect::>(); + let nset2 = nbunch2.iter().cloned().collect::>(); + out_edges + .into_iter() + .filter(|(u, v, _weight)| { + (nset1.contains(&u) && nset2.contains(&v)) || (nset2.contains(&u) && nset1.contains(&v)) + }) + .collect() +} + +#[cfg(test)] +mod tests { + + use super::*; + use crate::{generators, GraphSpecs}; + use assert_unordered::assert_eq_unordered; + + #[test] + fn test_edge_boundary_1() { + let edges = vec![ + Edge::new("n1", "n2"), + Edge::new("n1", "n3"), + Edge::new("n2", "n1"), + Edge::new("n2", "n3"), + ]; + let specs = GraphSpecs::directed_create_missing(); + let graph: Graph<&str, ()> = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); + let result = edge_boundary(&graph, &["n1"], None).unwrap(); + assert_eq!(result.len(), 2); + let essence = result.iter().map(|e| (e.u, e.v)).collect::>(); + assert_eq_unordered!(essence, vec![("n1", "n2"), ("n1", "n3")]); + } + + #[test] + fn test_edge_boundary_2() { + let edges = vec![ + Edge::new("n1", "n2"), + Edge::new("n1", "n3"), + Edge::new("n2", "n1"), + Edge::new("n2", "n3"), + ]; + let specs = GraphSpecs::directed_create_missing(); + let graph: Graph<&str, ()> = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); + let result = edge_boundary(&graph, &["n1"], Some(&["n2", "n3"])).unwrap(); + let essence = result.iter().map(|e| (e.u, e.v)).collect::>(); + assert_eq_unordered!(essence, vec![("n1", "n2"), ("n1", "n3")]); + } + + #[test] + fn test_edge_boundary_3() { + let edges = vec![ + Edge::new("n1", "n3"), + Edge::new("n2", "n1"), + Edge::new("n2", "n3"), + ]; + let specs = GraphSpecs::directed_create_missing(); + let graph: Graph<&str, ()> = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); + let result = edge_boundary(&graph, &["n1"], Some(&["n2", "n3"])).unwrap(); + let essence = result.iter().map(|e| (e.u, e.v)).collect::>(); + assert_eq_unordered!(essence, vec![("n1", "n3")]); + } + + #[test] + fn test_edge_boundary_4() { + let edges = vec![ + Edge::new("n1", "n3"), + Edge::new("n2", "n1"), + Edge::new("n2", "n3"), + ]; + let specs = GraphSpecs::undirected_create_missing(); + let graph: Graph<&str, ()> = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); + let result = edge_boundary(&graph, &["n1"], Some(&["n2", "n3"])).unwrap(); + let essence = result.iter().map(|e| (e.u, e.v)).collect::>(); + assert_eq_unordered!(essence, vec![("n1", "n2"), ("n1", "n3")]); + } + + #[test] + fn test_edge_boundary_5() { + let graph = generators::social::karate_club_graph(); + let result = edge_boundary(&graph, &[0, 1, 2, 3], Some(&[4, 5, 6, 7])).unwrap(); + let essence = result.iter().map(|e| (e.u, e.v)).collect::>(); + assert_eq_unordered!( + essence, + vec![(0, 4), (0, 5), (0, 6), (0, 7), (1, 7), (2, 7), (3, 7)] + ); + } +} diff --git a/src/algorithms/community/leiden/aggregate_graph.rs b/src/algorithms/community/leiden/aggregate_graph.rs new file mode 100644 index 0000000..8c7336f --- /dev/null +++ b/src/algorithms/community/leiden/aggregate_graph.rs @@ -0,0 +1,60 @@ +use super::Partition; +use crate::{Edge, Graph, Node}; +use nohash::IntSet; +use std::fmt::Display; +use std::hash::Hash; +use std::sync::Arc; + +pub(crate) struct AggregateGraph<'a, T, A> +where + T: Hash + Eq + Clone + Ord + Display + Send + Sync + PartialOrd, + A: Clone + Send + Sync, +{ + pub graph: Graph, + pub node_nodes: Option>>, + pub node_weights: Option>, + pub parent_graph: Option<&'a Graph>, + pub parent_partition: Option<&'a Partition>, +} + +impl<'a, T, A> AggregateGraph<'a, T, A> +where + T: Hash + Eq + Clone + Ord + Display + Send + Sync + PartialOrd, + A: Clone + Send + Sync, +{ + pub fn initial(graph: &Graph, weighted: bool) -> Self { + let nodes: Vec>> = graph + .get_all_node_names() + .into_iter() + .map(|name| Node::from_name(name.clone())) + .collect(); + let edges: Vec>> = graph + .get_all_edges() + .into_iter() + .map(|edge| match weighted { + true => edge.clone(), + false => Edge::with_weight(edge.u.clone(), edge.v.clone(), 1.0), + }) + .collect(); + let weighted_graph = + Graph::::new_from_nodes_and_edges(nodes, edges, graph.specs.clone()).unwrap(); + + AggregateGraph { + graph: weighted_graph, + node_nodes: None, + node_weights: None, + parent_graph: None, + parent_partition: None, + } + } + + pub fn node_total(&self, community: &IntSet) -> f64 { + if self.node_weights.is_none() { + return community.len() as f64; + } + community + .iter() + .map(|node| self.node_weights.as_ref().unwrap()[*node]) + .sum() + } +} diff --git a/src/algorithms/community/leiden.rs b/src/algorithms/community/leiden/mod.rs similarity index 67% rename from src/algorithms/community/leiden.rs rename to src/algorithms/community/leiden/mod.rs index f95780f..2eb3630 100644 --- a/src/algorithms/community/leiden.rs +++ b/src/algorithms/community/leiden/mod.rs @@ -1,97 +1,42 @@ use crate::{ - algorithms::community::partitions, algorithms::community::utility, ext::hashset::IntSetExt, - AdjacentNode, Edge, EdgeDedupeStrategy, Error, ErrorKind, Graph, GraphSpecs, Node, + algorithms::community::partitions, algorithms::community::utility, + algorithms::cuts::cut_size_by_indexes, ext::hashset::IntSetExt, Error, Graph, }; use nohash::IntSet; -use serde::de; use std::collections::{HashSet, VecDeque}; +use std::fmt::Debug; use std::fmt::Display; use std::hash::Hash; -use super::partitions::modularity_by_indexes; +mod partition; +use partition::Partition; -struct Partition { - pub node_partition: Vec, - pub partition: Vec>, - pub degree_sums: Vec, -} - -impl Partition { - pub fn node_community(&self, node: usize) -> &IntSet { - &self.partition[self.node_partition[node]] - } - pub fn degree_sum(&self, node: usize) -> f64 { - self.degree_sums[self.node_partition[node]] - } - pub fn move_node( - &mut self, - v: usize, - target: IntSet, - graph: &Graph, - weighted: bool, - ) where - T: Hash + Eq + Clone + Ord + Display + Send + Sync, - A: Clone + Send + Sync, - { - let source_partition_idx = self.node_partition[v]; - let target_partition_idx: usize; - if target.len() > 0 { - let el = target.iter().next().unwrap(); - target_partition_idx = self.node_partition[*el]; - } else { - target_partition_idx = self.partition.len(); - self.degree_sums.push(0.0); - } - - // Remove `v` from its old community and place it into the target partition - self.partition[source_partition_idx].remove(&v); - self.partition[target_partition_idx].insert(v); - - // Also update the sum of node degrees in that partition - let deg_v = match weighted { - true => graph.get_node_weighted_degree_by_index(v), - false => graph.get_node_degree_by_index(v) as f64, - }; - self.degree_sums[source_partition_idx] -= deg_v; - self.degree_sums[target_partition_idx] += deg_v; - - // Update v's entry in the index lookup table - self.node_partition[v] = target_partition_idx; - - // If the original partition is empty now, that we removed v from it, remove it and adjust the indexes in _node_part - if self.partition[source_partition_idx].len() == 0 { - self.partition.remove(source_partition_idx); - self.degree_sums.remove(source_partition_idx); - self.node_partition = self - .node_partition - .iter() - .map(|i| { - if *i < source_partition_idx { - *i - } else { - *i - 1 - } - }) - .collect(); - } - } -} +mod aggregate_graph; +use aggregate_graph::AggregateGraph; pub fn leiden( graph: &Graph, weighted: bool, resolution: Option, + omega: Option, ) -> Result>, Error> where T: Hash + Eq + Clone + Ord + Display + Send + Sync, A: Clone + Send + Sync, { let _resolution = resolution.unwrap_or(0.05); - let partition = get_singleton_partition(graph, weighted); + let _omega = omega.unwrap_or(0.3); + let aggregate_graph = AggregateGraph::initial(graph, weighted); + let mut partition = get_singleton_partition(graph, weighted); // Ok(partitions::convert_usize_partitions_to_t(partition, &graph)) let mut prev_partition: Option = None; loop { - let new_partition = move_nodes_fast(graph, &partition, weighted, _resolution); + let new_partition = move_nodes_fast( + &aggregate_graph.graph, + &mut partition, + weighted, + _resolution, + ); if partitions::partition_is_singleton(&new_partition.partition, graph.number_of_nodes()) || (prev_partition.is_some() && partitions::partitions_eq( @@ -104,13 +49,15 @@ where &graph, )); } - prev_partition = Some(new_partition); + prev_partition = Some(new_partition.clone()); + let refined_partition = + refine_partition(&aggregate_graph, &new_partition, _resolution, _omega); } } fn move_nodes_fast( graph: &Graph, - partition: &Partition, + partition: &mut Partition, weighted: bool, resolution: f64, ) -> Partition @@ -118,16 +65,10 @@ where T: Hash + Eq + Clone + Ord + Display + Send + Sync, A: Clone + Send + Sync, { - let mut shuffled_indexes: VecDeque = - utility::get_shuffled_node_indexes(graph, None).into(); - while let Some(v) = shuffled_indexes.pop_front() { - let adjacent_community_indexes = get_adjacent_communities(v, graph, &partition); - let mut adjacent_communities: Vec<&IntSet> = adjacent_community_indexes - .into_iter() - .map(|x| &partition.partition[x]) - .collect(); + let mut queue: VecDeque = utility::get_shuffled_node_indexes(graph, None).into(); + while let Some(v) = queue.pop_front() { let empty = IntSet::default(); - adjacent_communities.push(&empty); + let adjacent_communities = get_adjacent_communities(v, graph, partition, &empty); let (max_community, max_delta) = argmax( v, partition, @@ -136,12 +77,84 @@ where weighted, resolution, ); - if max_delta > 0.0 {} + if max_delta > 0.0 { + partition.move_node(v, &max_community, graph, weighted); + let queue_set: IntSet = queue.iter().cloned().collect(); + for u in graph.get_successor_nodes_by_index(&v) { + if !max_community.contains(&u.node_index) && !queue_set.contains(&u.node_index) { + queue.push_back(u.node_index); + } + } + } } - Partition { - partition: partition.partition.clone(), - node_partition: partition.node_partition.clone(), - degree_sums: partition.degree_sums.clone(), + partition.clone() +} + +fn refine_partition( + aggregate_graph: &AggregateGraph, + partition: &Partition, + resolution: f64, + omega: f64, +) -> Partition +where + T: Hash + Eq + Clone + Ord + Display + Send + Sync, + A: Clone + Send + Sync, +{ + let mut refined_partition = get_singleton_partition(&aggregate_graph.graph, true); + for community in partition.partition.iter() { + // merge_nodes_subset( + // &refined_partition, + // &community, + // graph, + // weighted, + // resolution, + // omega, + // ); + } + refined_partition +} + +fn merge_nodes_subset( + partition: &mut Partition, + community: &IntSet, + aggregate_graph: &AggregateGraph, + resolution: f64, + omega: f64, +) where + T: Hash + Eq + Clone + Ord + Debug + Display + Send + Sync, + A: Clone + Send + Sync, +{ + let size_s = aggregate_graph.node_total(community); + let R: IntSet = community + .iter() + .map(|v| v.clone()) + .filter(|v| { + let community_without_v: Vec = community.without(v).iter().cloned().collect(); + let x = cut_size_by_indexes(&aggregate_graph.graph, &[*v], &community_without_v, true); + let v_set = vec![*v].into_iter().collect(); + let v_node_total = aggregate_graph.node_total(&v_set); + x >= resolution * v_node_total * (size_s - v_node_total) + }) + .collect(); + for v in R { + if partition.node_community(v).len() != 1 { + continue; + } + let T = partition + .partition + .into_iter() + .filter(|C| { + let nbunch1: Vec = C.iter().map(|n| n.clone()).collect(); + let nbunch2: Vec = (community - C).iter().map(|n| n.clone()).collect(); + let cs = cut_size_by_indexes( + &aggregate_graph.graph, + nbunch1.as_slice(), + nbunch2.as_slice(), + true, + ); + if C.is_subset(community) {} + }) + .collect(); } } @@ -168,44 +181,46 @@ where } } -fn get_adjacent_communities( +fn get_adjacent_communities<'a, T, A>( node: usize, graph: &Graph, - partition: &Partition, -) -> IntSet + partition: &'a Partition, + empty: &'a IntSet, +) -> Vec<&'a IntSet> where T: Hash + Eq + Clone + Ord + Display + Send + Sync, A: Clone + Send + Sync, { - let mut adjacent_communities = IntSet::default(); - adjacent_communities.insert(partition.node_partition[node]); + let mut adjacent_communities: Vec<&IntSet> = vec![]; + adjacent_communities.push(&partition.partition[partition.node_partition[node]]); for u in graph.get_successor_nodes_by_index(&node) { - adjacent_communities.insert(partition.node_partition[u.node_index]); + adjacent_communities.push(&partition.partition[partition.node_partition[u.node_index]]); } + adjacent_communities.push(&empty); adjacent_communities } -fn argmax<'a, T, A>( +fn argmax( v: usize, partition: &Partition, - communities: &'a [&IntSet], + communities: &[&IntSet], graph: &Graph, weighted: bool, resolution: f64, -) -> (&'a IntSet, f64) +) -> (IntSet, f64) where T: Hash + Eq + Clone + Ord + Display + Send + Sync, A: Clone + Send + Sync, { let mut idx = 0; - let mut opt = communities[idx]; - let mut val = get_delta(v, partition, opt, graph, weighted, resolution); + let mut opt: IntSet = communities[idx].iter().cloned().collect(); + let mut val = get_delta(v, partition, &opt, graph, weighted, resolution); for k in 1..communities.len() { let optk = &communities[k]; let valk = get_delta(v, partition, optk, graph, weighted, resolution); if valk > val { idx = k; - opt = optk; + opt = optk.iter().cloned().collect(); val = valk; } } @@ -247,6 +262,8 @@ where / m } +// fn aggregate_graph(graph: &Graph) + fn single_node_neighbor_cut_size( graph: &Graph, v: usize, @@ -274,6 +291,7 @@ mod tests { use super::*; use crate::{Edge, Graph, GraphSpecs, Node}; use assert_approx_eq::assert_approx_eq; + use sprs::vec; use std::sync::Arc; #[test] @@ -387,26 +405,30 @@ mod tests { node_partition: vec![0, 0, 1, 2, 3], degree_sums: vec![0.0, 0.0, 0.0, 0.0], }; - let result = get_adjacent_communities(0, &graph, &partition); - assert_eq!(result.len(), 2); - assert!(result.contains(&0)); - assert!(result.contains(&1)); - let result = get_adjacent_communities(1, &graph, &partition); - assert_eq!(result.len(), 2); - assert!(result.contains(&0)); - assert!(result.contains(&1)); - let result = get_adjacent_communities(2, &graph, &partition); + let empty = IntSet::default(); + let result = get_adjacent_communities(0, &graph, &partition, &empty); assert_eq!(result.len(), 3); - assert!(result.contains(&1)); - assert!(result.contains(&2)); - assert!(result.contains(&3)); + assert!(result == vec![&partition.partition[0], &partition.partition[1], &empty]); + let result = get_adjacent_communities(1, &graph, &partition, &empty); + assert!(result == vec![&partition.partition[0], &partition.partition[1], &empty]); + let result = get_adjacent_communities(2, &graph, &partition, &empty); + assert!( + result + == vec![ + &partition.partition[1], + &partition.partition[2], + &partition.partition[3], + &empty + ] + ); } #[test] fn test_argmax_1() { let graph = get_graph_for_argmax(true); let partition = get_partition_for_argmax(); - let communities = get_communities_for_argmax(&partition, &graph); + let empty = IntSet::default(); + let communities = get_communities_for_argmax(&partition, &graph, &empty); let result = argmax(0, &partition, &communities, &graph, true, 1.0); assert_eq!(result.0.len(), 1); assert!(result.0.contains(&2)); @@ -421,7 +443,8 @@ mod tests { fn test_argmax_2() { let graph = get_graph_for_argmax(false); let partition = get_partition_for_argmax(); - let communities = get_communities_for_argmax(&partition, &graph); + let empty = IntSet::default(); + let communities = get_communities_for_argmax(&partition, &graph, &empty); let result = argmax(0, &partition, &communities, &graph, true, 1.0); assert_eq!(result.0.len(), 1); assert!(result.0.contains(&2)); @@ -434,7 +457,22 @@ mod tests { #[test] fn test_move_node() { - // TODO + let graph = get_graph_for_argmax(true); + let mut partition = get_partition_for_argmax(); + let mut target = IntSet::default(); + target.insert(2); + partition.move_node(0, &target, &graph, true); + assert_eq!(partition.partition.len(), 4); + assert!(partition.partition[0] == vec![1].into_iter().collect()); + assert!(partition.partition[1] == vec![0, 2].into_iter().collect()); + assert!(partition.partition[2] == vec![3].into_iter().collect()); + assert!(partition.partition[3] == vec![4].into_iter().collect()); + assert_eq!(partition.node_partition[0], 1); + assert_eq!(partition.node_partition[1], 0); + assert_eq!(partition.node_partition[2], 1); + assert_eq!(partition.node_partition[3], 2); + assert_eq!(partition.node_partition[4], 3); + assert!(partition.degree_sums == vec![-1.1, 1.1, 0.0, 0.0]); } fn get_graph_for_argmax(directed: bool) -> Graph { @@ -475,11 +513,8 @@ mod tests { fn get_communities_for_argmax<'a>( partition: &'a Partition, graph: &Graph, + empty: &'a IntSet, ) -> Vec<&'a IntSet> { - let community_indexes = get_adjacent_communities(0, &graph, &partition); - community_indexes - .into_iter() - .map(|x| &partition.partition[x]) - .collect() + get_adjacent_communities(0, &graph, &partition, empty) } } diff --git a/src/algorithms/community/leiden/partition.rs b/src/algorithms/community/leiden/partition.rs new file mode 100644 index 0000000..c0ac9f8 --- /dev/null +++ b/src/algorithms/community/leiden/partition.rs @@ -0,0 +1,74 @@ +use crate::Graph; +use nohash::IntSet; +use std::fmt::Display; +use std::hash::Hash; + +#[derive(Debug, Clone)] +pub(crate) struct Partition { + pub node_partition: Vec, + pub partition: Vec>, + pub degree_sums: Vec, +} + +impl Partition { + pub fn node_community(&self, node: usize) -> &IntSet { + &self.partition[self.node_partition[node]] + } + + pub fn degree_sum(&self, node: usize) -> f64 { + self.degree_sums[self.node_partition[node]] + } + + pub fn move_node( + &mut self, + v: usize, + target: &IntSet, + graph: &Graph, + weighted: bool, + ) where + T: Hash + Eq + Clone + Ord + Display + Send + Sync, + A: Clone + Send + Sync, + { + let source_partition_idx = self.node_partition[v]; + let target_partition_idx: usize; + if target.len() > 0 { + let el = target.iter().next().unwrap(); + target_partition_idx = self.node_partition[*el]; + } else { + target_partition_idx = self.partition.len(); + self.degree_sums.push(0.0); + } + + // Remove `v` from its old community and place it into the target partition + self.partition[source_partition_idx].remove(&v); + self.partition[target_partition_idx].insert(v); + + // Also update the sum of node degrees in that partition + let deg_v = match weighted { + true => graph.get_node_weighted_degree_by_index(v), + false => graph.get_node_degree_by_index(v) as f64, + }; + self.degree_sums[source_partition_idx] -= deg_v; + self.degree_sums[target_partition_idx] += deg_v; + + // Update v's entry in the index lookup table + self.node_partition[v] = target_partition_idx; + + // If the original partition is empty now, that we removed v from it, remove it and adjust the indexes in _node_part + if self.partition[source_partition_idx].len() == 0 { + self.partition.remove(source_partition_idx); + self.degree_sums.remove(source_partition_idx); + self.node_partition = self + .node_partition + .iter() + .map(|i| { + if *i < source_partition_idx { + *i + } else { + *i - 1 + } + }) + .collect(); + } + } +} diff --git a/src/algorithms/cuts/mod.rs b/src/algorithms/cuts/mod.rs new file mode 100644 index 0000000..6235d8d --- /dev/null +++ b/src/algorithms/cuts/mod.rs @@ -0,0 +1,152 @@ +use crate::{ + algorithms::boundary::{edge_boundary, edge_boundary_by_indexes}, + Error, Graph, +}; +use std::fmt::Debug; +use std::fmt::Display; +use std::hash::Hash; + +/** +Returns the size of the cut between two bunches of nodes. + +A *cut* is a partition of the nodes of a graph into two sets. The +*cut size* is the sum of the weights of the edges "between" the two +sets of nodes. + +# Arguments + +* `graph`: the `Graph` the nodes are in +* `nbunch1`: the first set of nodes +* `nbunch2`: the second set of nodes +* `weighted`: whether to consider edge weights + +``` +use graphrs::{algorithms::cuts::cut_size, generators, Graph}; + +let graph = generators::social::karate_club_graph(); +let size = cut_size(&graph, &[0, 1, 2, 3], &[4, 5, 6, 7], true).unwrap(); +assert_eq!(size, 22.0); +``` + +*/ +pub fn cut_size( + graph: &Graph, + nbunch1: &[T], + nbunch2: &[T], + weighted: bool, +) -> Result +where + T: Hash + Eq + Clone + Ord + Debug + Display + Send + Sync, + A: Clone + Send + Sync, +{ + let mut edges = edge_boundary(graph, nbunch1, Some(nbunch2))?; + edges = match graph.specs.directed { + false => edges, + true => { + edges.extend(edge_boundary(graph, nbunch2, Some(nbunch1))?); + edges + } + }; + Ok(edges + .into_iter() + .map(|e| match weighted { + true => e.weight, + false => 1.0, + }) + .sum()) +} + +pub(crate) fn cut_size_by_indexes( + graph: &Graph, + nbunch1: &[usize], + nbunch2: &[usize], + weighted: bool, +) -> f64 +where + T: Hash + Eq + Clone + Ord + Debug + Display + Send + Sync, + A: Clone + Send + Sync, +{ + let mut edges = edge_boundary_by_indexes(graph, nbunch1, nbunch2); + edges = match graph.specs.directed { + false => edges, + true => { + edges.extend(edge_boundary_by_indexes(graph, nbunch2, nbunch1)); + edges + } + }; + edges + .into_iter() + .map(|e| match weighted { + true => e.2, + false => 1.0, + }) + .sum() +} + +#[cfg(test)] +mod tests { + + use super::*; + use crate::{generators, Edge, GraphSpecs}; + + #[test] + fn test_cut_size_1() { + let edges = vec![ + Edge::new("n1", "n2"), + Edge::new("n1", "n3"), + Edge::new("n2", "n1"), + Edge::new("n2", "n3"), + ]; + let specs = GraphSpecs::directed_create_missing(); + let graph: Graph<&str, ()> = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); + let result = cut_size(&graph, &["n1"], &["n2"], false).unwrap(); + assert_eq!(result, 2.0); + } + + #[test] + fn test_cut_size_2() { + let edges = vec![ + Edge::with_weight("n1", "n2", 1.1), + Edge::with_weight("n1", "n3", 2.3), + Edge::with_weight("n2", "n1", 3.5), + Edge::with_weight("n2", "n3", 4.7), + ]; + let specs = GraphSpecs::directed_create_missing(); + let graph: Graph<&str, ()> = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); + let result = cut_size(&graph, &["n1"], &["n2"], true).unwrap(); + assert_eq!(result, 4.6); + } + + #[test] + fn test_cut_size_3() { + let edges = vec![ + Edge::new("n1", "n3"), + Edge::new("n2", "n1"), + Edge::new("n2", "n3"), + ]; + let specs = GraphSpecs::undirected_create_missing(); + let graph: Graph<&str, ()> = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); + let result = cut_size(&graph, &["n1"], &["n2"], false).unwrap(); + assert_eq!(result, 1.0); + } + + #[test] + fn test_cut_size_4() { + let edges = vec![ + Edge::with_weight("n1", "n3", 2.3), + Edge::with_weight("n2", "n1", 3.5), + Edge::with_weight("n2", "n3", 4.7), + ]; + let specs = GraphSpecs::undirected_create_missing(); + let graph: Graph<&str, ()> = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); + let result = cut_size(&graph, &["n1"], &["n2"], true).unwrap(); + assert_eq!(result, 3.5); + } + + #[test] + fn test_cut_size_5() { + let graph = generators::social::karate_club_graph(); + let result = cut_size(&graph, &[0, 1, 2, 3], &[4, 5, 6, 7], true).unwrap(); + assert_eq!(result, 22.0); + } +} diff --git a/src/algorithms/mod.rs b/src/algorithms/mod.rs index a18fe11..9a1b77d 100644 --- a/src/algorithms/mod.rs +++ b/src/algorithms/mod.rs @@ -1,3 +1,6 @@ +/// Functions to find the boundary of a set of nodes. +pub mod boundary; + /// Compute the centrality of nodes and edges in the graph. pub mod centrality; @@ -10,6 +13,9 @@ pub mod community; /// Find components of a graph. pub mod components; +/// Functions for finding and evaluating cuts in a graph. +pub mod cuts; + /// Compute resiliency measures of a graph. pub mod resiliency; diff --git a/src/graph/query.rs b/src/graph/query.rs index 00ef36c..4820bdc 100644 --- a/src/graph/query.rs +++ b/src/graph/query.rs @@ -605,6 +605,26 @@ where .collect()) } + pub(crate) fn get_out_edges_for_node_indexes( + &self, + node_indexes: &[usize], + ) -> Vec<(usize, usize, f64)> + where + T: Hash + Eq + Clone + Ord, + A: Clone, + { + let x: Vec<(usize, usize, f64)> = node_indexes + .iter() + .flat_map(|node_index| { + self.get_successor_nodes_by_index(&node_index) + .into_iter() + .map(|adj| (*node_index, adj.node_index, adj.weight)) + .collect::>() + }) + .collect(); + x + } + /** Returns all the nodes that connect to `node_name`. From 35d9abbe56516800eac2213f2e80d676c6953cef Mon Sep 17 00:00:00 2001 From: Malcolm van Raalte Date: Wed, 8 Jan 2025 13:55:56 -0500 Subject: [PATCH 4/9] wip --- .../community/leiden/aggregate_graph.rs | 315 ++++++++++++++++-- src/algorithms/community/leiden/mod.rs | 286 ++++++++++++---- src/algorithms/community/leiden/partition.rs | 105 +++++- src/graph/degree.rs | 2 +- 4 files changed, 614 insertions(+), 94 deletions(-) diff --git a/src/algorithms/community/leiden/aggregate_graph.rs b/src/algorithms/community/leiden/aggregate_graph.rs index 8c7336f..6c8219d 100644 --- a/src/algorithms/community/leiden/aggregate_graph.rs +++ b/src/algorithms/community/leiden/aggregate_graph.rs @@ -1,53 +1,75 @@ use super::Partition; -use crate::{Edge, Graph, Node}; -use nohash::IntSet; +use crate::{Edge, Graph, GraphSpecs, Node}; +use nohash::{IntMap, IntSet}; use std::fmt::Display; use std::hash::Hash; use std::sync::Arc; -pub(crate) struct AggregateGraph<'a, T, A> -where - T: Hash + Eq + Clone + Ord + Display + Send + Sync + PartialOrd, - A: Clone + Send + Sync, -{ - pub graph: Graph, +pub(crate) struct AggregateGraph { + pub graph: Graph, pub node_nodes: Option>>, pub node_weights: Option>, - pub parent_graph: Option<&'a Graph>, - pub parent_partition: Option<&'a Partition>, + pub parent_graph: Option>, } -impl<'a, T, A> AggregateGraph<'a, T, A> -where - T: Hash + Eq + Clone + Ord + Display + Send + Sync + PartialOrd, - A: Clone + Send + Sync, -{ - pub fn initial(graph: &Graph, weighted: bool) -> Self { - let nodes: Vec>> = graph - .get_all_node_names() +impl AggregateGraph { + pub fn initial(graph: &Graph, weighted: bool) -> Self + where + T: Hash + Eq + Clone + Ord + Display + Send + Sync + PartialOrd, + A: Clone + Send + Sync, + { + let nodes: Vec>> = (0..graph.number_of_nodes()) .into_iter() - .map(|name| Node::from_name(name.clone())) + .map(|node_index| Node::from_name_and_attributes(node_index, 1.0)) .collect(); - let edges: Vec>> = graph + let edges: Vec>> = graph .get_all_edges() .into_iter() - .map(|edge| match weighted { - true => edge.clone(), - false => Edge::with_weight(edge.u.clone(), edge.v.clone(), 1.0), + .map(|edge| { + let u = graph.get_node_index(&edge.u).unwrap(); + let v = graph.get_node_index(&edge.v).unwrap(); + let weight = match weighted { + true => edge.weight, + false => 1.0, + }; + Arc::new(Edge { + u, + v, + weight, + attributes: Some(f64::NAN), + }) }) .collect(); let weighted_graph = - Graph::::new_from_nodes_and_edges(nodes, edges, graph.specs.clone()).unwrap(); + Graph::::new_from_nodes_and_edges(nodes, edges, graph.specs.clone()) + .unwrap(); AggregateGraph { graph: weighted_graph, node_nodes: None, node_weights: None, parent_graph: None, - parent_partition: None, } } + pub fn find_original_graph(&self) -> &Graph { + match self.parent_graph { + Some(ref parent) => parent.find_original_graph(), + None => &self.graph, + } + } + + pub fn collect_nodes(&self, nodes: &IntSet) -> IntSet { + if self.parent_graph.is_none() { + return nodes.clone(); + } + let parent = self.parent_graph.as_ref().unwrap(); + nodes + .into_iter() + .flat_map(|node| parent.collect_nodes(&self.node_nodes.as_ref().unwrap()[*node])) + .collect() + } + pub fn node_total(&self, community: &IntSet) -> f64 { if self.node_weights.is_none() { return community.len() as f64; @@ -57,4 +79,247 @@ where .map(|node| self.node_weights.as_ref().unwrap()[*node]) .sum() } + + pub fn from_partition(self, partition: &Partition) -> AggregateGraph { + println!( + "self.graph.nodes {:?}", + self.graph + .get_all_nodes() + .into_iter() + .map(|n| (n.name, n.attributes.unwrap())) + .collect::>() + ); // MALCOLM + let node_nodes = partition.partition.iter().map(|c| c.clone()).collect(); + let node_weights: Vec = partition + .partition + .iter() + .map(|c| { + c.iter() + .map(|n| self.graph.get_node_by_index(n).unwrap().attributes.unwrap()) + .sum::() + }) + .collect(); + println!("node_weights {:?}", node_weights); + let new_nodes: Vec>> = partition + .partition + .iter() + .enumerate() + .map(|(i, _c)| Node::from_name_and_attributes(i, node_weights[i])) + .collect(); + let mut new_edge_weights = IntMap::>::default(); + self.graph.get_all_edges().into_iter().for_each(|edge| { + let mut u_com = partition.node_partition[edge.u]; + let mut v_com = partition.node_partition[edge.v]; + if u_com > v_com { + (u_com, v_com) = (v_com, u_com); + } + let weight = new_edge_weights + .entry(u_com) + .or_insert_with(IntMap::default) + .entry(v_com) + .or_insert(0.0); + *weight += edge.weight; + }); + let new_edges: Vec>> = new_edge_weights + .into_iter() + .flat_map(|(u_com, v_weights)| { + v_weights + .into_iter() + .map(move |(v_com, weight)| Edge::with_weight(u_com, v_com, weight)) + }) + .collect(); + let new_graph: Graph = Graph::new_from_nodes_and_edges( + new_nodes, + new_edges, + GraphSpecs { + directed: false, + self_loops: true, + ..self.graph.specs.clone() + }, + ) + .unwrap(); + for edge in new_graph.get_all_edges().iter() { + println!("{} {} {}", edge.u, edge.v, edge.weight); + } + AggregateGraph { + graph: new_graph, + node_nodes: Some(node_nodes), + node_weights: Some(node_weights), + parent_graph: Some(Box::new(self)), + } + } +} + +#[cfg(test)] +mod tests { + + use super::*; + use crate::GraphSpecs; + + #[test] + fn test_from_partition() { + let graph = get_graph(false); + let partition = Partition { + node_partition: vec![0, 0, 1, 1, 1], + partition: vec![ + vec![0, 1].into_iter().collect(), + vec![2, 3, 4].into_iter().collect(), + ], + degree_sums: vec![0.0, 0.0, 0.0, 0.0, 0.0], + }; + let aggregate_graph = AggregateGraph::initial(&graph, true); + let aggregate_graph = aggregate_graph.from_partition(&partition); + assert_eq!(aggregate_graph.graph.number_of_nodes(), 2); + assert_eq!( + aggregate_graph.node_nodes, + Some(vec![ + vec![0, 1].into_iter().collect(), + vec![2, 3, 4].into_iter().collect(), + ]) + ); + assert_eq!(aggregate_graph.node_weights, Some(vec![2.0, 3.0])); + assert_eq!( + aggregate_graph + .parent_graph + .unwrap() + .graph + .number_of_nodes(), + 5 + ); + assert_eq!(aggregate_graph.graph.number_of_nodes(), 2); + assert_eq!(aggregate_graph.graph.number_of_edges(), 2); + assert_eq!(aggregate_graph.graph.get_edge(0, 1).unwrap().weight, 3.4); + assert_eq!(aggregate_graph.graph.get_edge(1, 1).unwrap().weight, 8.2); + } + + #[test] + fn test_find_original_graph() { + let graph = get_graph(false); + let aggregate_graph = AggregateGraph::initial(&graph, false); + let partition = Partition { + node_partition: vec![0, 0, 1, 1, 2], + partition: vec![ + vec![0, 1].into_iter().collect(), + vec![2, 3].into_iter().collect(), + vec![4].into_iter().collect(), + ], + degree_sums: vec![0.0, 0.0, 0.0, 0.0, 0.0], + }; + let aggregate_graph = aggregate_graph.from_partition(&partition); + let partition = Partition { + node_partition: vec![0, 1, 2], + partition: vec![ + vec![0, 1].into_iter().collect(), + vec![2].into_iter().collect(), + ], + degree_sums: vec![0.0, 0.0, 0.0], + }; + let aggregate_graph = aggregate_graph.from_partition(&partition); + let original_graph = aggregate_graph.find_original_graph(); + assert_eq!(original_graph.number_of_nodes(), 5); + } + + #[test] + fn test_collect_nodes_1() { + let graph = get_graph(false); + let aggregate_graph = AggregateGraph::initial(&graph, false); + let partition = Partition { + node_partition: vec![0, 0, 1, 1, 1], + partition: vec![ + vec![0, 1].into_iter().collect(), + vec![2, 3, 4].into_iter().collect(), + ], + degree_sums: vec![0.0, 0.0, 0.0, 0.0, 0.0], + }; + let aggregate_graph = aggregate_graph.from_partition(&partition); + let nodes = vec![0].into_iter().collect(); + let result = aggregate_graph.collect_nodes(&nodes); + assert_eq!(result, vec![0, 1].into_iter().collect()); + + let nodes = vec![1].into_iter().collect(); + let result = aggregate_graph.collect_nodes(&nodes); + assert_eq!(result, vec![2, 3, 4].into_iter().collect()); + + let nodes = vec![0, 1].into_iter().collect(); + let result = aggregate_graph.collect_nodes(&nodes); + assert_eq!(result, vec![0, 1, 2, 3, 4].into_iter().collect()); + } + + #[test] + fn test_collect_nodes_2() { + let graph = get_graph(true); + let aggregate_graph = AggregateGraph::initial(&graph, false); + let partition = Partition { + node_partition: vec![0, 0, 1, 1, 1], + partition: vec![ + vec![0, 1].into_iter().collect(), + vec![2, 3, 4].into_iter().collect(), + ], + degree_sums: vec![0.0, 0.0, 0.0, 0.0, 0.0], + }; + let aggregate_graph = aggregate_graph.from_partition(&partition); + + let nodes = vec![0].into_iter().collect(); + let result = aggregate_graph.collect_nodes(&nodes); + assert_eq!(result, vec![0, 1].into_iter().collect()); + + let nodes = vec![1].into_iter().collect(); + let result = aggregate_graph.collect_nodes(&nodes); + assert_eq!(result, vec![2, 3, 4].into_iter().collect()); + + let nodes = vec![0, 1].into_iter().collect(); + let result = aggregate_graph.collect_nodes(&nodes); + assert_eq!(result, vec![0, 1, 2, 3, 4].into_iter().collect()); + } + + #[test] + fn test_collect_nodes_3() { + let graph = get_graph(false); + let aggregate_graph = AggregateGraph::initial(&graph, false); + let partition = Partition { + node_partition: vec![0, 0, 1, 1, 2], + partition: vec![ + vec![0, 1].into_iter().collect(), + vec![2, 3].into_iter().collect(), + vec![4].into_iter().collect(), + ], + degree_sums: vec![0.0, 0.0, 0.0, 0.0, 0.0], + }; + let aggregate_graph = aggregate_graph.from_partition(&partition); + let partition = Partition { + node_partition: vec![0, 1, 2], + partition: vec![ + vec![0, 1].into_iter().collect(), + vec![2].into_iter().collect(), + ], + degree_sums: vec![0.0, 0.0, 0.0], + }; + let aggregate_graph = aggregate_graph.from_partition(&partition); + + let nodes = vec![0].into_iter().collect(); + let result = aggregate_graph.collect_nodes(&nodes); + assert_eq!(result, vec![0, 1, 2, 3].into_iter().collect()); + } + + fn get_graph(directed: bool) -> Graph { + let nodes = vec![ + Node::from_name(0), + Node::from_name(1), + Node::from_name(2), + Node::from_name(3), + Node::from_name(4), + ]; + let edges: Vec>> = vec![ + Edge::with_weight(0, 2, 1.1), + Edge::with_weight(1, 2, 2.3), + Edge::with_weight(2, 3, 3.5), + Edge::with_weight(2, 4, 4.7), + ]; + let specs = if directed { + GraphSpecs::directed_create_missing() + } else { + GraphSpecs::undirected_create_missing() + }; + Graph::new_from_nodes_and_edges(nodes, edges, specs).unwrap() + } } diff --git a/src/algorithms/community/leiden/mod.rs b/src/algorithms/community/leiden/mod.rs index 2eb3630..ad777be 100644 --- a/src/algorithms/community/leiden/mod.rs +++ b/src/algorithms/community/leiden/mod.rs @@ -2,7 +2,11 @@ use crate::{ algorithms::community::partitions, algorithms::community::utility, algorithms::cuts::cut_size_by_indexes, ext::hashset::IntSetExt, Error, Graph, }; +use core::f64; +use itertools::Itertools; use nohash::IntSet; +use rand::distributions::WeightedIndex; +use rand::{distributions::Distribution, RngCore}; use std::collections::{HashSet, VecDeque}; use std::fmt::Debug; use std::fmt::Display; @@ -18,54 +22,83 @@ pub fn leiden( graph: &Graph, weighted: bool, resolution: Option, - omega: Option, + theta: Option, + gamma: Option, ) -> Result>, Error> where - T: Hash + Eq + Clone + Ord + Display + Send + Sync, + T: Hash + Eq + Clone + Ord + Debug + Display + Send + Sync, A: Clone + Send + Sync, { - let _resolution = resolution.unwrap_or(0.05); - let _omega = omega.unwrap_or(0.3); - let aggregate_graph = AggregateGraph::initial(graph, weighted); + let _resolution = resolution.unwrap_or(0.25); + let _theta = theta.unwrap_or(0.3); + let _gamma = gamma.unwrap_or(0.05); + let mut aggregate_graph = AggregateGraph::initial(graph, weighted); let mut partition = get_singleton_partition(graph, weighted); - // Ok(partitions::convert_usize_partitions_to_t(partition, &graph)) let mut prev_partition: Option = None; loop { - let new_partition = move_nodes_fast( + partition = move_nodes_fast( &aggregate_graph.graph, &mut partition, weighted, _resolution, ); - if partitions::partition_is_singleton(&new_partition.partition, graph.number_of_nodes()) + if partitions::partition_is_singleton(&partition.partition, graph.number_of_nodes()) || (prev_partition.is_some() && partitions::partitions_eq( - &new_partition.partition, + &partition.partition, &prev_partition.unwrap().partition, )) { + let flattened = partition.flatten(&aggregate_graph); return Ok(partitions::convert_usize_partitions_to_t( - new_partition.partition, + flattened.partition, &graph, )); } - prev_partition = Some(new_partition.clone()); + prev_partition = Some(partition.clone()); + println!("\n"); let refined_partition = - refine_partition(&aggregate_graph, &new_partition, _resolution, _omega); + refine_partition(&aggregate_graph, &partition, _resolution, _theta, _gamma); + println!("refined_partition {:?}", refined_partition); + println!("\n"); + aggregate_graph = aggregate_graph.from_partition(&refined_partition); + println!( + "aggregate_graph {:?}", + aggregate_graph + .graph + .get_all_nodes() + .into_iter() + .map(|n| (n.name, n.attributes.unwrap())) + .collect::>() + ); // MALCOLM + let partitions: Vec> = partition + .partition + .iter() + .map(|c| { + aggregate_graph + .node_nodes + .as_ref() + .unwrap() + .iter() + .enumerate() + .filter(|(_i, nodes)| nodes.is_subset(c)) + .map(|(i, _nodes)| i) + .collect() + }) + .collect(); + partition = Partition::from_partition(&aggregate_graph.graph, partitions); + println!("partition {:?}", partition); } } -fn move_nodes_fast( - graph: &Graph, +fn move_nodes_fast( + graph: &Graph, partition: &mut Partition, weighted: bool, resolution: f64, -) -> Partition -where - T: Hash + Eq + Clone + Ord + Display + Send + Sync, - A: Clone + Send + Sync, -{ +) -> Partition { let mut queue: VecDeque = utility::get_shuffled_node_indexes(graph, None).into(); + // let mut queue: VecDeque = (0..graph.number_of_nodes()).collect::>().into(); while let Some(v) = queue.pop_front() { let empty = IntSet::default(); let adjacent_communities = get_adjacent_communities(v, graph, partition, &empty); @@ -77,6 +110,10 @@ where weighted, resolution, ); + println!( + "max_community: {:?} max_delta: {}", + max_community, max_delta + ); if max_delta > 0.0 { partition.move_node(v, &max_community, graph, weighted); let queue_set: IntSet = queue.iter().cloned().collect(); @@ -87,43 +124,43 @@ where } } } + println!("done move_nodes_fast"); + println!("{:?}", partition); partition.clone() } -fn refine_partition( - aggregate_graph: &AggregateGraph, +fn refine_partition( + aggregate_graph: &AggregateGraph, partition: &Partition, resolution: f64, - omega: f64, -) -> Partition -where - T: Hash + Eq + Clone + Ord + Display + Send + Sync, - A: Clone + Send + Sync, -{ + theta: f64, + gamma: f64, +) -> Partition { let mut refined_partition = get_singleton_partition(&aggregate_graph.graph, true); + let mut rng: Box = Box::new(rand::thread_rng()); for community in partition.partition.iter() { - // merge_nodes_subset( - // &refined_partition, - // &community, - // graph, - // weighted, - // resolution, - // omega, - // ); + merge_nodes_subset( + &mut refined_partition, + &community, + aggregate_graph, + resolution, + theta, + gamma, + &mut rng, + ); } refined_partition } -fn merge_nodes_subset( +fn merge_nodes_subset( partition: &mut Partition, community: &IntSet, - aggregate_graph: &AggregateGraph, + aggregate_graph: &AggregateGraph, resolution: f64, - omega: f64, -) where - T: Hash + Eq + Clone + Ord + Debug + Display + Send + Sync, - A: Clone + Send + Sync, -{ + theta: f64, + gamma: f64, + rng: &mut Box, +) { let size_s = aggregate_graph.node_total(community); let R: IntSet = community .iter() @@ -133,16 +170,19 @@ fn merge_nodes_subset( let x = cut_size_by_indexes(&aggregate_graph.graph, &[*v], &community_without_v, true); let v_set = vec![*v].into_iter().collect(); let v_node_total = aggregate_graph.node_total(&v_set); - x >= resolution * v_node_total * (size_s - v_node_total) + x >= gamma * v_node_total * (size_s - v_node_total) }) .collect(); - for v in R { + println!("R: {:?}", R); + for v in R.into_iter().sorted() { if partition.node_community(v).len() != 1 { continue; } - let T = partition + println!("v: {:?}", v); + let T: Vec> = partition .partition - .into_iter() + .iter() + .cloned() .filter(|C| { let nbunch1: Vec = C.iter().map(|n| n.clone()).collect(); let nbunch2: Vec = (community - C).iter().map(|n| n.clone()).collect(); @@ -152,9 +192,31 @@ fn merge_nodes_subset( nbunch2.as_slice(), true, ); - if C.is_subset(community) {} + let C_node_total = aggregate_graph.node_total(C); + C.is_subset(community) && cs >= gamma * C_node_total * (size_s - C_node_total) + }) + .collect(); + println!(" T: {:?}", T); + let mut communities: Vec<(&IntSet, f64)> = T + .iter() + .map(|C| { + ( + C, + get_delta(v, partition, C, &aggregate_graph.graph, true, resolution), + ) }) + .filter(|(_C, delta)| *delta >= 0.0) .collect(); + let weights: Vec = communities + .iter() + .map(|(_C, delta)| (delta / theta).exp()) + .collect(); + let dist = WeightedIndex::new(&weights).unwrap(); + let new_community = communities[dist.sample(rng)]; + // communities.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap()); // MALCOLM + // let new_community = communities.last().unwrap(); // MALCOLM + println!(" new_community: {:?}", new_community); + partition.move_node(v, new_community.0, &aggregate_graph.graph, true); } } @@ -191,12 +253,26 @@ where T: Hash + Eq + Clone + Ord + Display + Send + Sync, A: Clone + Send + Sync, { - let mut adjacent_communities: Vec<&IntSet> = vec![]; - adjacent_communities.push(&partition.partition[partition.node_partition[node]]); + let mut adjacent_community_ids: IntSet = IntSet::default(); + // let mut adjacent_communities: Vec<&IntSet> = vec![]; + adjacent_community_ids.insert(partition.node_partition[node]); for u in graph.get_successor_nodes_by_index(&node) { - adjacent_communities.push(&partition.partition[partition.node_partition[u.node_index]]); + adjacent_community_ids.insert(partition.node_partition[u.node_index]); } + // if node == 2 { + // println!("{:?}", partition); + // println!("{:?}", adjacent_community_ids); + // } // MALCOLM + let mut adjacent_communities: Vec<&IntSet> = adjacent_community_ids + .into_iter() + .map(|i| &partition.partition[i]) + .collect(); adjacent_communities.push(&empty); + // println!( + // "adjacent_communities for {}: {:?}", + // node, + // adjacent_communities.len() + // ); // MALCOLM adjacent_communities } @@ -212,14 +288,12 @@ where T: Hash + Eq + Clone + Ord + Display + Send + Sync, A: Clone + Send + Sync, { - let mut idx = 0; - let mut opt: IntSet = communities[idx].iter().cloned().collect(); + let mut opt: IntSet = communities[0].iter().cloned().collect(); let mut val = get_delta(v, partition, &opt, graph, weighted, resolution); for k in 1..communities.len() { let optk = &communities[k]; let valk = get_delta(v, partition, optk, graph, weighted, resolution); if valk > val { - idx = k; opt = optk.iter().cloned().collect(); val = valk; } @@ -257,12 +331,16 @@ where false => partition.degree_sum(*target.into_iter().next().unwrap()), }; - ((diff_target - diff_source) + let delta = ((diff_target - diff_source) - resolution / (2.0 * m) * (deg_v.powf(2.0) + deg_v * (degs_target - degs_source))) - / m -} + / m; -// fn aggregate_graph(graph: &Graph) + // MALCOLM + // println!("partition: {:?}", partition); + // println!("target: {:?}", target); + // println!("delta | v: {} target: {:?} delta: {}", v, target, delta); + delta +} fn single_node_neighbor_cut_size( graph: &Graph, @@ -291,7 +369,8 @@ mod tests { use super::*; use crate::{Edge, Graph, GraphSpecs, Node}; use assert_approx_eq::assert_approx_eq; - use sprs::vec; + use rand::SeedableRng; + use rand_chacha::ChaCha20Rng; use std::sync::Arc; #[test] @@ -475,15 +554,102 @@ mod tests { assert!(partition.degree_sums == vec![-1.1, 1.1, 0.0, 0.0]); } - fn get_graph_for_argmax(directed: bool) -> Graph { + #[test] + fn test_merge_nodes_subset_1() { + let (mut partition, community, aggregate_graph) = get_params_for_merge_nodes_subset(); + let mut rng: Box = Box::new(ChaCha20Rng::seed_from_u64(1)); + merge_nodes_subset( + &mut partition, + &community, + &aggregate_graph, + 0.25, + 0.3, + 0.05, + &mut rng, + ); + assert_eq!(partition.node_partition, vec![1, 0, 1, 2, 2, 2]); + assert_eq!( + partition.partition, + vec![ + vec![1].into_iter().collect(), + vec![0, 2].into_iter().collect(), + vec![3, 4, 5].into_iter().collect(), + ] + ); + assert_eq!(partition.degree_sums, vec![3.3, 12.3, 20.5]); + } + + #[test] + fn test_merge_nodes_subset_2() { + let (mut partition, community, aggregate_graph) = get_params_for_merge_nodes_subset(); + let mut rng: Box = Box::new(ChaCha20Rng::seed_from_u64(4)); + merge_nodes_subset( + &mut partition, + &community, + &aggregate_graph, + 0.25, + 0.3, + 0.05, + &mut rng, + ); + assert_eq!(partition.node_partition, vec![0, 0, 0, 1, 2, 2]); + assert_eq!( + partition.partition, + vec![ + vec![0, 1, 2].into_iter().collect(), + vec![3].into_iter().collect(), + vec![4, 5].into_iter().collect(), + ] + ); + assert_eq!(partition.degree_sums, vec![15.600000000000001, 6.2, 12.6]); + } + + fn get_params_for_merge_nodes_subset<'a>() -> (Partition, IntSet, AggregateGraph) { let nodes = vec![ Node::from_name(0), Node::from_name(1), Node::from_name(2), Node::from_name(3), Node::from_name(4), + Node::from_name(5), ]; let edges: Vec>> = vec![ + Edge::with_weight(0, 1, 1.1), + Edge::with_weight(1, 2, 2.2), + Edge::with_weight(0, 2, 3.7), + Edge::with_weight(2, 3, 1.7), + Edge::with_weight(3, 4, 2.1), + Edge::with_weight(4, 5, 3.2), + Edge::with_weight(3, 5, 4.1), + ]; + let graph = + Graph::new_from_nodes_and_edges(nodes, edges, GraphSpecs::undirected()).unwrap(); + let partition = Partition { + partition: vec![ + vec![0].into_iter().collect(), + vec![1].into_iter().collect(), + vec![2].into_iter().collect(), + vec![3].into_iter().collect(), + vec![4].into_iter().collect(), + vec![5].into_iter().collect(), + ], + node_partition: vec![0, 1, 2, 3, 4, 5], + degree_sums: vec![4.8, 3.3, 7.5, 6.2, 5.3, 7.3], + }; + let community = vec![0, 1, 2, 3, 4, 5].into_iter().collect(); + let aggregate_graph = AggregateGraph::initial(&graph, true); + (partition, community, aggregate_graph) + } + + fn get_graph_for_argmax(directed: bool) -> Graph { + let nodes = vec![ + Node::from_name(0), + Node::from_name(1), + Node::from_name(2), + Node::from_name(3), + Node::from_name(4), + ]; + let edges: Vec>> = vec![ Edge::with_weight(0, 2, 1.1), Edge::with_weight(1, 2, 2.3), Edge::with_weight(2, 3, 3.5), @@ -512,7 +678,7 @@ mod tests { fn get_communities_for_argmax<'a>( partition: &'a Partition, - graph: &Graph, + graph: &Graph, empty: &'a IntSet, ) -> Vec<&'a IntSet> { get_adjacent_communities(0, &graph, &partition, empty) diff --git a/src/algorithms/community/leiden/partition.rs b/src/algorithms/community/leiden/partition.rs index c0ac9f8..4021c7b 100644 --- a/src/algorithms/community/leiden/partition.rs +++ b/src/algorithms/community/leiden/partition.rs @@ -1,7 +1,8 @@ use crate::Graph; +use itertools::Itertools; use nohash::IntSet; -use std::fmt::Display; -use std::hash::Hash; + +use super::AggregateGraph; #[derive(Debug, Clone)] pub(crate) struct Partition { @@ -19,16 +20,13 @@ impl Partition { self.degree_sums[self.node_partition[node]] } - pub fn move_node( + pub fn move_node( &mut self, v: usize, target: &IntSet, - graph: &Graph, + graph: &Graph, weighted: bool, - ) where - T: Hash + Eq + Clone + Ord + Display + Send + Sync, - A: Clone + Send + Sync, - { + ) { let source_partition_idx = self.node_partition[v]; let target_partition_idx: usize; if target.len() > 0 { @@ -71,4 +69,95 @@ impl Partition { .collect(); } } + + pub fn from_partition(graph: &Graph, partition: Vec>) -> Partition { + println!("degrees: {:?}", graph.get_weighted_degree_for_all_nodes()); + let node_partition: Vec = partition + .iter() + .enumerate() + .flat_map(|(i, c)| c.iter().map(move |n| (*n, i))) + .sorted() + .map(|(_n, i)| i) + .collect(); + let degree_sums: Vec = partition + .iter() + .map(|c| { + c.iter() + .map(|n| graph.get_node_weighted_degree_by_index(*n)) + .sum() + }) + .collect(); + Partition { + node_partition, + partition, + degree_sums, + } + } + + pub fn flatten(self, aggregate_graph: &AggregateGraph) -> Self { + if aggregate_graph.parent_graph.is_none() { + return self; + } + let graph = aggregate_graph.find_original_graph(); + let partitions = self + .partition + .iter() + .map(|p| aggregate_graph.collect_nodes(p)) + .collect(); + Partition::from_partition(graph, partitions) + } +} + +#[cfg(test)] +mod tests { + + use super::*; + use crate::{Edge, Graph, GraphSpecs, Node}; + use std::sync::Arc; + + #[test] + fn test_from_partition_1() { + let graph = get_graph(false); + let partition = vec![ + vec![0, 1].into_iter().collect(), + vec![2, 3, 4].into_iter().collect(), + ]; + let new_partition = Partition::from_partition(&graph, partition); + assert_eq!(new_partition.node_partition, vec![0, 0, 1, 1, 1]); + assert_eq!(new_partition.degree_sums, vec![3.4, 19.8]); + } + + #[test] + fn test_from_partition_2() { + let graph = get_graph(true); + let partition = vec![ + vec![0, 1].into_iter().collect(), + vec![2, 3, 4].into_iter().collect(), + ]; + let new_partition = Partition::from_partition(&graph, partition); + assert_eq!(new_partition.node_partition, vec![0, 0, 1, 1, 1]); + assert_eq!(new_partition.degree_sums, vec![3.4, 19.799999999999997]); + } + + fn get_graph(directed: bool) -> Graph { + let nodes = vec![ + Node::from_name_and_attributes(0, f64::NAN), + Node::from_name_and_attributes(1, f64::NAN), + Node::from_name_and_attributes(2, f64::NAN), + Node::from_name_and_attributes(3, f64::NAN), + Node::from_name_and_attributes(4, f64::NAN), + ]; + let edges: Vec>> = vec![ + Edge::with_weight(0, 2, 1.1), + Edge::with_weight(1, 2, 2.3), + Edge::with_weight(2, 3, 3.5), + Edge::with_weight(2, 4, 4.7), + ]; + let specs = if directed { + GraphSpecs::directed_create_missing() + } else { + GraphSpecs::undirected_create_missing() + }; + Graph::new_from_nodes_and_edges(nodes, edges, specs).unwrap() + } } diff --git a/src/graph/degree.rs b/src/graph/degree.rs index b670ef4..bc1f24d 100644 --- a/src/graph/degree.rs +++ b/src/graph/degree.rs @@ -1,5 +1,5 @@ use super::Graph; -use crate::{AdjacentNode, Error, ErrorKind}; +use crate::{Error, ErrorKind}; use std::collections::HashMap; use std::fmt::Display; use std::hash::Hash; From 493464937bfc71de06e6609e26c6c133b3b8df8d Mon Sep 17 00:00:00 2001 From: Malcolm van Raalte Date: Wed, 8 Jan 2025 16:11:32 -0500 Subject: [PATCH 5/9] wip --- .../community/leiden/aggregate_graph.rs | 24 +- src/algorithms/community/leiden/mod.rs | 253 ++++++++++++++---- src/algorithms/community/leiden/partition.rs | 6 +- src/generators/random.rs | 2 +- 4 files changed, 221 insertions(+), 64 deletions(-) diff --git a/src/algorithms/community/leiden/aggregate_graph.rs b/src/algorithms/community/leiden/aggregate_graph.rs index 6c8219d..f6176a9 100644 --- a/src/algorithms/community/leiden/aggregate_graph.rs +++ b/src/algorithms/community/leiden/aggregate_graph.rs @@ -81,14 +81,14 @@ impl AggregateGraph { } pub fn from_partition(self, partition: &Partition) -> AggregateGraph { - println!( - "self.graph.nodes {:?}", - self.graph - .get_all_nodes() - .into_iter() - .map(|n| (n.name, n.attributes.unwrap())) - .collect::>() - ); // MALCOLM + // println!( + // "self.graph.nodes {:?}", + // self.graph + // .get_all_nodes() + // .into_iter() + // .map(|n| (n.name, n.attributes.unwrap())) + // .collect::>() + // ); // MALCOLM let node_nodes = partition.partition.iter().map(|c| c.clone()).collect(); let node_weights: Vec = partition .partition @@ -99,7 +99,7 @@ impl AggregateGraph { .sum::() }) .collect(); - println!("node_weights {:?}", node_weights); + // println!("node_weights {:?}", node_weights); let new_nodes: Vec>> = partition .partition .iter() @@ -138,9 +138,9 @@ impl AggregateGraph { }, ) .unwrap(); - for edge in new_graph.get_all_edges().iter() { - println!("{} {} {}", edge.u, edge.v, edge.weight); - } + // for edge in new_graph.get_all_edges().iter() { + // println!("{} {} {}", edge.u, edge.v, edge.weight); + // } AggregateGraph { graph: new_graph, node_nodes: Some(node_nodes), diff --git a/src/algorithms/community/leiden/mod.rs b/src/algorithms/community/leiden/mod.rs index ad777be..ec09e06 100644 --- a/src/algorithms/community/leiden/mod.rs +++ b/src/algorithms/community/leiden/mod.rs @@ -18,9 +18,15 @@ use partition::Partition; mod aggregate_graph; use aggregate_graph::AggregateGraph; +pub enum QualityFunction { + Modularity, + CPM, +} + pub fn leiden( graph: &Graph, weighted: bool, + quality_function: QualityFunction, resolution: Option, theta: Option, gamma: Option, @@ -40,6 +46,7 @@ where &aggregate_graph.graph, &mut partition, weighted, + &quality_function, _resolution, ); if partitions::partition_is_singleton(&partition.partition, graph.number_of_nodes()) @@ -56,21 +63,27 @@ where )); } prev_partition = Some(partition.clone()); - println!("\n"); - let refined_partition = - refine_partition(&aggregate_graph, &partition, _resolution, _theta, _gamma); - println!("refined_partition {:?}", refined_partition); - println!("\n"); + // println!("\n"); + let refined_partition = refine_partition( + &aggregate_graph, + &partition, + &quality_function, + _resolution, + _theta, + _gamma, + ); + // println!("refined_partition {:?}", refined_partition); + // println!("\n"); aggregate_graph = aggregate_graph.from_partition(&refined_partition); - println!( - "aggregate_graph {:?}", - aggregate_graph - .graph - .get_all_nodes() - .into_iter() - .map(|n| (n.name, n.attributes.unwrap())) - .collect::>() - ); // MALCOLM + // println!( + // "aggregate_graph {:?}", + // aggregate_graph + // .graph + // .get_all_nodes() + // .into_iter() + // .map(|n| (n.name, n.attributes.unwrap())) + // .collect::>() + // ); // MALCOLM let partitions: Vec> = partition .partition .iter() @@ -87,7 +100,7 @@ where }) .collect(); partition = Partition::from_partition(&aggregate_graph.graph, partitions); - println!("partition {:?}", partition); + // println!("partition {:?}", partition); } } @@ -95,11 +108,13 @@ fn move_nodes_fast( graph: &Graph, partition: &mut Partition, weighted: bool, + quality_function: &QualityFunction, resolution: f64, ) -> Partition { let mut queue: VecDeque = utility::get_shuffled_node_indexes(graph, None).into(); // let mut queue: VecDeque = (0..graph.number_of_nodes()).collect::>().into(); while let Some(v) = queue.pop_front() { + println!("v: {:?}", v); let empty = IntSet::default(); let adjacent_communities = get_adjacent_communities(v, graph, partition, &empty); let (max_community, max_delta) = argmax( @@ -108,12 +123,13 @@ fn move_nodes_fast( &adjacent_communities, graph, weighted, + &quality_function, resolution, ); - println!( - "max_community: {:?} max_delta: {}", - max_community, max_delta - ); + // println!( + // "max_community: {:?} max_delta: {}", + // max_community, max_delta + // ); if max_delta > 0.0 { partition.move_node(v, &max_community, graph, weighted); let queue_set: IntSet = queue.iter().cloned().collect(); @@ -124,14 +140,15 @@ fn move_nodes_fast( } } } - println!("done move_nodes_fast"); - println!("{:?}", partition); + // println!("done move_nodes_fast"); + // println!("{:?}", partition); partition.clone() } fn refine_partition( aggregate_graph: &AggregateGraph, partition: &Partition, + quality_function: &QualityFunction, resolution: f64, theta: f64, gamma: f64, @@ -143,6 +160,7 @@ fn refine_partition( &mut refined_partition, &community, aggregate_graph, + quality_function, resolution, theta, gamma, @@ -156,6 +174,7 @@ fn merge_nodes_subset( partition: &mut Partition, community: &IntSet, aggregate_graph: &AggregateGraph, + quality_function: &QualityFunction, resolution: f64, theta: f64, gamma: f64, @@ -173,12 +192,12 @@ fn merge_nodes_subset( x >= gamma * v_node_total * (size_s - v_node_total) }) .collect(); - println!("R: {:?}", R); + // println!("R: {:?}", R); for v in R.into_iter().sorted() { if partition.node_community(v).len() != 1 { continue; } - println!("v: {:?}", v); + // println!("v: {:?}", v); let T: Vec> = partition .partition .iter() @@ -196,13 +215,21 @@ fn merge_nodes_subset( C.is_subset(community) && cs >= gamma * C_node_total * (size_s - C_node_total) }) .collect(); - println!(" T: {:?}", T); + // println!(" T: {:?}", T); let mut communities: Vec<(&IntSet, f64)> = T .iter() .map(|C| { ( C, - get_delta(v, partition, C, &aggregate_graph.graph, true, resolution), + get_delta( + v, + partition, + C, + &aggregate_graph.graph, + true, + &quality_function, + resolution, + ), ) }) .filter(|(_C, delta)| *delta >= 0.0) @@ -215,7 +242,7 @@ fn merge_nodes_subset( let new_community = communities[dist.sample(rng)]; // communities.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap()); // MALCOLM // let new_community = communities.last().unwrap(); // MALCOLM - println!(" new_community: {:?}", new_community); + // println!(" new_community: {:?}", new_community); partition.move_node(v, new_community.0, &aggregate_graph.graph, true); } } @@ -276,23 +303,36 @@ where adjacent_communities } -fn argmax( +fn argmax( v: usize, partition: &Partition, communities: &[&IntSet], - graph: &Graph, + graph: &Graph, weighted: bool, + quality_function: &QualityFunction, resolution: f64, -) -> (IntSet, f64) -where - T: Hash + Eq + Clone + Ord + Display + Send + Sync, - A: Clone + Send + Sync, -{ +) -> (IntSet, f64) { let mut opt: IntSet = communities[0].iter().cloned().collect(); - let mut val = get_delta(v, partition, &opt, graph, weighted, resolution); + let mut val = get_delta( + v, + partition, + &opt, + graph, + weighted, + &quality_function, + resolution, + ); for k in 1..communities.len() { let optk = &communities[k]; - let valk = get_delta(v, partition, optk, graph, weighted, resolution); + let valk = get_delta( + v, + partition, + optk, + graph, + weighted, + &quality_function, + resolution, + ); if valk > val { opt = optk.iter().cloned().collect(); val = valk; @@ -301,18 +341,31 @@ where (opt, val) } -fn get_delta( +fn get_delta( v: usize, partition: &Partition, target: &IntSet, - graph: &Graph, + graph: &Graph, weighted: bool, + quality_function: &QualityFunction, resolution: f64, -) -> f64 -where - T: Hash + Eq + Clone + Ord + Display + Send + Sync, - A: Clone + Send + Sync, -{ +) -> f64 { + match quality_function { + QualityFunction::Modularity => { + get_delta_modularity(v, partition, target, graph, weighted, resolution) + } + QualityFunction::CPM => get_delta_cpm(v, partition, target, graph, weighted, resolution), + } +} + +fn get_delta_modularity( + v: usize, + partition: &Partition, + target: &IntSet, + graph: &Graph, + weighted: bool, + resolution: f64, +) -> f64 { if target.contains(&v) { return 0.0; } @@ -342,6 +395,56 @@ where delta } +fn get_delta_cpm( + v: usize, + partition: &Partition, + target: &IntSet, + graph: &Graph, + weighted: bool, + resolution: f64, +) -> f64 { + if target.contains(&v) { + return 0.0; + } + let m = graph.size(weighted); + let source_community = partition.node_community(v); + let diff_source = + single_node_neighbor_cut_size(graph, v, &source_community.without(&v), weighted); + let diff_target = single_node_neighbor_cut_size(graph, v, &target, weighted); + + let node_weights = graph + .get_all_nodes() + .into_iter() + .map(|n| n.attributes.unwrap()) + .collect::>(); + let v_weight = node_weights[v]; + let source_weight = source_community + .iter() + .map(|n| node_weights[*n]) + .sum::(); + let target_weight = target.iter().map(|n| node_weights[*n]).sum::(); + + // let deg_v = match weighted { + // true => graph.get_node_weighted_degree_by_index(v), + // false => graph.get_node_degree_by_index(v) as f64, + // }; + // let degs_source = partition.degree_sum(v); + // let degs_target = match target.len() == 0 { + // true => 0.0, + // false => partition.degree_sum(*target.into_iter().next().unwrap()), + // }; + + let delta = diff_target + - diff_source + - resolution * v_weight * (v_weight + target_weight - source_weight); + + // MALCOLM + // println!("partition: {:?}", partition); + // println!("target: {:?}", target); + // println!("delta | v: {} target: {:?} delta: {}", v, target, delta); + delta +} + fn single_node_neighbor_cut_size( graph: &Graph, v: usize, @@ -413,7 +516,7 @@ mod tests { #[test] fn test_get_delta_1() { - let edges: Vec>> = vec![ + let edges: Vec>> = vec![ Edge::with_weight(0, 1, 1.1), Edge::with_weight(1, 2, 2.3), Edge::with_weight(1, 3, 3.5), @@ -430,13 +533,21 @@ mod tests { degree_sums: vec![12.0, 24.0], }; let target = vec![2, 3, 4].into_iter().collect(); - let result = get_delta(1, &partition, &target, &graph, true, 1.0); + let result = get_delta( + 1, + &partition, + &target, + &graph, + true, + &QualityFunction::Modularity, + 1.0, + ); assert_approx_eq!(result, -0.11206896551724145); } #[test] fn test_get_delta_2() { - let edges: Vec>> = vec![ + let edges: Vec>> = vec![ Edge::with_weight(0, 1, 1.1), Edge::with_weight(1, 2, 2.3), Edge::with_weight(1, 3, 3.5), @@ -453,7 +564,15 @@ mod tests { degree_sums: vec![12.0, 24.0], }; let target = vec![2, 3, 4].into_iter().collect(); - let result = get_delta(1, &partition, &target, &graph, true, 1.0); + let result = get_delta( + 1, + &partition, + &target, + &graph, + true, + &QualityFunction::Modularity, + 1.0, + ); assert_approx_eq!(result, -0.20689655172413812); } @@ -508,11 +627,27 @@ mod tests { let partition = get_partition_for_argmax(); let empty = IntSet::default(); let communities = get_communities_for_argmax(&partition, &graph, &empty); - let result = argmax(0, &partition, &communities, &graph, true, 1.0); + let result = argmax( + 0, + &partition, + &communities, + &graph, + true, + &QualityFunction::Modularity, + 1.0, + ); assert_eq!(result.0.len(), 1); assert!(result.0.contains(&2)); assert_approx_eq!(result.1, 0.09033145065398336); - let result = argmax(0, &partition, &communities, &graph, false, 1.0); + let result = argmax( + 0, + &partition, + &communities, + &graph, + false, + &QualityFunction::Modularity, + 1.0, + ); assert_eq!(result.0.len(), 1); assert!(result.0.contains(&2)); assert_approx_eq!(result.1, 0.21875); @@ -524,11 +659,27 @@ mod tests { let partition = get_partition_for_argmax(); let empty = IntSet::default(); let communities = get_communities_for_argmax(&partition, &graph, &empty); - let result = argmax(0, &partition, &communities, &graph, true, 1.0); + let result = argmax( + 0, + &partition, + &communities, + &graph, + true, + &QualityFunction::Modularity, + 1.0, + ); assert_eq!(result.0.len(), 1); assert!(result.0.contains(&2)); assert_approx_eq!(result.1, 0.09033145065398336); - let result = argmax(0, &partition, &communities, &graph, false, 1.0); + let result = argmax( + 0, + &partition, + &communities, + &graph, + false, + &QualityFunction::Modularity, + 1.0, + ); assert_eq!(result.0.len(), 1); assert!(result.0.contains(&2)); assert_approx_eq!(result.1, 0.21875); @@ -562,6 +713,7 @@ mod tests { &mut partition, &community, &aggregate_graph, + &QualityFunction::Modularity, 0.25, 0.3, 0.05, @@ -587,6 +739,7 @@ mod tests { &mut partition, &community, &aggregate_graph, + &QualityFunction::Modularity, 0.25, 0.3, 0.05, diff --git a/src/algorithms/community/leiden/partition.rs b/src/algorithms/community/leiden/partition.rs index 4021c7b..5c2e97b 100644 --- a/src/algorithms/community/leiden/partition.rs +++ b/src/algorithms/community/leiden/partition.rs @@ -34,9 +34,13 @@ impl Partition { target_partition_idx = self.node_partition[*el]; } else { target_partition_idx = self.partition.len(); + self.partition.push(target.clone()); self.degree_sums.push(0.0); } + // println!("source_partition_idx: {:?}", source_partition_idx); + // println!("target_partition_idx: {:?}", target_partition_idx); + // Remove `v` from its old community and place it into the target partition self.partition[source_partition_idx].remove(&v); self.partition[target_partition_idx].insert(v); @@ -71,7 +75,7 @@ impl Partition { } pub fn from_partition(graph: &Graph, partition: Vec>) -> Partition { - println!("degrees: {:?}", graph.get_weighted_degree_for_all_nodes()); + // println!("degrees: {:?}", graph.get_weighted_degree_for_all_nodes()); let node_partition: Vec = partition .iter() .enumerate() diff --git a/src/generators/random.rs b/src/generators/random.rs index 5616c6b..8cec82a 100644 --- a/src/generators/random.rs +++ b/src/generators/random.rs @@ -94,7 +94,7 @@ fn fast_gnp_random_graph_undirected( let lr: f64 = (1.0_f64 - rng.gen::()).ln(); w = w + 1 + ((lr / lp) as i32); while w >= v && v < num_nodes { - w += v; + w -= v; v += 1; } if v < num_nodes { From 625219dfe7cb997d88a1812e69cc1c801e578a38 Mon Sep 17 00:00:00 2001 From: Malcolm van Raalte Date: Wed, 8 Jan 2025 17:07:20 -0500 Subject: [PATCH 6/9] wip --- .../community/leiden/aggregate_graph.rs | 12 - src/algorithms/community/leiden/mod.rs | 509 ++---------------- src/algorithms/community/leiden/partition.rs | 68 ++- src/algorithms/community/leiden/quality.rs | 366 +++++++++++++ 4 files changed, 478 insertions(+), 477 deletions(-) create mode 100644 src/algorithms/community/leiden/quality.rs diff --git a/src/algorithms/community/leiden/aggregate_graph.rs b/src/algorithms/community/leiden/aggregate_graph.rs index f6176a9..243b167 100644 --- a/src/algorithms/community/leiden/aggregate_graph.rs +++ b/src/algorithms/community/leiden/aggregate_graph.rs @@ -81,14 +81,6 @@ impl AggregateGraph { } pub fn from_partition(self, partition: &Partition) -> AggregateGraph { - // println!( - // "self.graph.nodes {:?}", - // self.graph - // .get_all_nodes() - // .into_iter() - // .map(|n| (n.name, n.attributes.unwrap())) - // .collect::>() - // ); // MALCOLM let node_nodes = partition.partition.iter().map(|c| c.clone()).collect(); let node_weights: Vec = partition .partition @@ -99,7 +91,6 @@ impl AggregateGraph { .sum::() }) .collect(); - // println!("node_weights {:?}", node_weights); let new_nodes: Vec>> = partition .partition .iter() @@ -138,9 +129,6 @@ impl AggregateGraph { }, ) .unwrap(); - // for edge in new_graph.get_all_edges().iter() { - // println!("{} {} {}", edge.u, edge.v, edge.weight); - // } AggregateGraph { graph: new_graph, node_nodes: Some(node_nodes), diff --git a/src/algorithms/community/leiden/mod.rs b/src/algorithms/community/leiden/mod.rs index ec09e06..f8665ca 100644 --- a/src/algorithms/community/leiden/mod.rs +++ b/src/algorithms/community/leiden/mod.rs @@ -1,6 +1,6 @@ use crate::{ algorithms::community::partitions, algorithms::community::utility, - algorithms::cuts::cut_size_by_indexes, ext::hashset::IntSetExt, Error, Graph, + algorithms::cuts::cut_size_by_indexes, ext::hashset::IntSetExt, Error, ErrorKind, Graph, }; use core::f64; use itertools::Itertools; @@ -18,11 +18,30 @@ use partition::Partition; mod aggregate_graph; use aggregate_graph::AggregateGraph; -pub enum QualityFunction { - Modularity, - CPM, -} +mod quality; +pub use quality::QualityFunction; +use quality::{argmax, get_delta}; + +/** +Returns the best partition of a graph, using the Leiden algorithm. + +The Leiden algorithm is considered better than the Louvain algorithm, +as it is more accurate and faster. See the paper "From Louvain to Leiden: +guaranteeing well-connected communities" by V.A. Traag, L. Waltman and N.J. van Eck. + +# Arguments + +* `graph`: a [Graph](../../../struct.Graph.html) instance +* `weighted`: set to `true` to use edge weights when determining communities +* `quality_function`: the quality function to use, either modularity or Constant Potts Model (CPM) +* `resolution`: larger values result in smaller communities; default 0.25 +* `theta`: the θ parameter of the Leiden method, which determines the randomness in the refinement phase of the Leiden algorithm; default 0.3 +* `gamma`: the γ parameter of the Leiden method, which also controls the granularity of the communities; default 0.05 + +# Examples + +*/ pub fn leiden( graph: &Graph, weighted: bool, @@ -35,6 +54,14 @@ where T: Hash + Eq + Clone + Ord + Debug + Display + Send + Sync, A: Clone + Send + Sync, { + if graph.specs.directed { + return Err(Error { + kind: ErrorKind::WrongMethod, + message: "The Leiden algorithm does not supported drected graphs. \ + Consider using the `to_undirected` method to convert your graph." + .to_string(), + }); + } let _resolution = resolution.unwrap_or(0.25); let _theta = theta.unwrap_or(0.3); let _gamma = gamma.unwrap_or(0.05); @@ -63,7 +90,6 @@ where )); } prev_partition = Some(partition.clone()); - // println!("\n"); let refined_partition = refine_partition( &aggregate_graph, &partition, @@ -72,18 +98,7 @@ where _theta, _gamma, ); - // println!("refined_partition {:?}", refined_partition); - // println!("\n"); aggregate_graph = aggregate_graph.from_partition(&refined_partition); - // println!( - // "aggregate_graph {:?}", - // aggregate_graph - // .graph - // .get_all_nodes() - // .into_iter() - // .map(|n| (n.name, n.attributes.unwrap())) - // .collect::>() - // ); // MALCOLM let partitions: Vec> = partition .partition .iter() @@ -100,7 +115,6 @@ where }) .collect(); partition = Partition::from_partition(&aggregate_graph.graph, partitions); - // println!("partition {:?}", partition); } } @@ -112,11 +126,9 @@ fn move_nodes_fast( resolution: f64, ) -> Partition { let mut queue: VecDeque = utility::get_shuffled_node_indexes(graph, None).into(); - // let mut queue: VecDeque = (0..graph.number_of_nodes()).collect::>().into(); while let Some(v) = queue.pop_front() { - println!("v: {:?}", v); let empty = IntSet::default(); - let adjacent_communities = get_adjacent_communities(v, graph, partition, &empty); + let adjacent_communities = partition.get_adjacent_communities(v, graph, &empty); let (max_community, max_delta) = argmax( v, partition, @@ -126,10 +138,6 @@ fn move_nodes_fast( &quality_function, resolution, ); - // println!( - // "max_community: {:?} max_delta: {}", - // max_community, max_delta - // ); if max_delta > 0.0 { partition.move_node(v, &max_community, graph, weighted); let queue_set: IntSet = queue.iter().cloned().collect(); @@ -140,8 +148,6 @@ fn move_nodes_fast( } } } - // println!("done move_nodes_fast"); - // println!("{:?}", partition); partition.clone() } @@ -181,7 +187,7 @@ fn merge_nodes_subset( rng: &mut Box, ) { let size_s = aggregate_graph.node_total(community); - let R: IntSet = community + let communities_of_size: IntSet = community .iter() .map(|v| v.clone()) .filter(|v| { @@ -192,39 +198,37 @@ fn merge_nodes_subset( x >= gamma * v_node_total * (size_s - v_node_total) }) .collect(); - // println!("R: {:?}", R); - for v in R.into_iter().sorted() { + for v in communities_of_size.into_iter().sorted() { if partition.node_community(v).len() != 1 { continue; } - // println!("v: {:?}", v); - let T: Vec> = partition + let filtered: Vec> = partition .partition .iter() .cloned() - .filter(|C| { - let nbunch1: Vec = C.iter().map(|n| n.clone()).collect(); - let nbunch2: Vec = (community - C).iter().map(|n| n.clone()).collect(); + .filter(|part| { + let nbunch1: Vec = part.iter().map(|n| n.clone()).collect(); + let nbunch2: Vec = (community - part).iter().map(|n| n.clone()).collect(); let cs = cut_size_by_indexes( &aggregate_graph.graph, nbunch1.as_slice(), nbunch2.as_slice(), true, ); - let C_node_total = aggregate_graph.node_total(C); - C.is_subset(community) && cs >= gamma * C_node_total * (size_s - C_node_total) + let part_node_total = aggregate_graph.node_total(part); + part.is_subset(community) + && cs >= gamma * part_node_total * (size_s - part_node_total) }) .collect(); - // println!(" T: {:?}", T); - let mut communities: Vec<(&IntSet, f64)> = T + let communities: Vec<(&IntSet, f64)> = filtered .iter() - .map(|C| { + .map(|fc| { ( - C, + fc, get_delta( v, partition, - C, + fc, &aggregate_graph.graph, true, &quality_function, @@ -232,17 +236,14 @@ fn merge_nodes_subset( ), ) }) - .filter(|(_C, delta)| *delta >= 0.0) + .filter(|(_fc, delta)| *delta >= 0.0) .collect(); let weights: Vec = communities .iter() - .map(|(_C, delta)| (delta / theta).exp()) + .map(|(_fc, delta)| (delta / theta).exp()) .collect(); let dist = WeightedIndex::new(&weights).unwrap(); let new_community = communities[dist.sample(rng)]; - // communities.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap()); // MALCOLM - // let new_community = communities.last().unwrap(); // MALCOLM - // println!(" new_community: {:?}", new_community); partition.move_node(v, new_community.0, &aggregate_graph.graph, true); } } @@ -270,421 +271,15 @@ where } } -fn get_adjacent_communities<'a, T, A>( - node: usize, - graph: &Graph, - partition: &'a Partition, - empty: &'a IntSet, -) -> Vec<&'a IntSet> -where - T: Hash + Eq + Clone + Ord + Display + Send + Sync, - A: Clone + Send + Sync, -{ - let mut adjacent_community_ids: IntSet = IntSet::default(); - // let mut adjacent_communities: Vec<&IntSet> = vec![]; - adjacent_community_ids.insert(partition.node_partition[node]); - for u in graph.get_successor_nodes_by_index(&node) { - adjacent_community_ids.insert(partition.node_partition[u.node_index]); - } - // if node == 2 { - // println!("{:?}", partition); - // println!("{:?}", adjacent_community_ids); - // } // MALCOLM - let mut adjacent_communities: Vec<&IntSet> = adjacent_community_ids - .into_iter() - .map(|i| &partition.partition[i]) - .collect(); - adjacent_communities.push(&empty); - // println!( - // "adjacent_communities for {}: {:?}", - // node, - // adjacent_communities.len() - // ); // MALCOLM - adjacent_communities -} - -fn argmax( - v: usize, - partition: &Partition, - communities: &[&IntSet], - graph: &Graph, - weighted: bool, - quality_function: &QualityFunction, - resolution: f64, -) -> (IntSet, f64) { - let mut opt: IntSet = communities[0].iter().cloned().collect(); - let mut val = get_delta( - v, - partition, - &opt, - graph, - weighted, - &quality_function, - resolution, - ); - for k in 1..communities.len() { - let optk = &communities[k]; - let valk = get_delta( - v, - partition, - optk, - graph, - weighted, - &quality_function, - resolution, - ); - if valk > val { - opt = optk.iter().cloned().collect(); - val = valk; - } - } - (opt, val) -} - -fn get_delta( - v: usize, - partition: &Partition, - target: &IntSet, - graph: &Graph, - weighted: bool, - quality_function: &QualityFunction, - resolution: f64, -) -> f64 { - match quality_function { - QualityFunction::Modularity => { - get_delta_modularity(v, partition, target, graph, weighted, resolution) - } - QualityFunction::CPM => get_delta_cpm(v, partition, target, graph, weighted, resolution), - } -} - -fn get_delta_modularity( - v: usize, - partition: &Partition, - target: &IntSet, - graph: &Graph, - weighted: bool, - resolution: f64, -) -> f64 { - if target.contains(&v) { - return 0.0; - } - let m = graph.size(weighted); - let source_community = partition.node_community(v); - let diff_source = - single_node_neighbor_cut_size(graph, v, &source_community.without(&v), weighted); - let diff_target = single_node_neighbor_cut_size(graph, v, &target, weighted); - let deg_v = match weighted { - true => graph.get_node_weighted_degree_by_index(v), - false => graph.get_node_degree_by_index(v) as f64, - }; - let degs_source = partition.degree_sum(v); - let degs_target = match target.len() == 0 { - true => 0.0, - false => partition.degree_sum(*target.into_iter().next().unwrap()), - }; - - let delta = ((diff_target - diff_source) - - resolution / (2.0 * m) * (deg_v.powf(2.0) + deg_v * (degs_target - degs_source))) - / m; - - // MALCOLM - // println!("partition: {:?}", partition); - // println!("target: {:?}", target); - // println!("delta | v: {} target: {:?} delta: {}", v, target, delta); - delta -} - -fn get_delta_cpm( - v: usize, - partition: &Partition, - target: &IntSet, - graph: &Graph, - weighted: bool, - resolution: f64, -) -> f64 { - if target.contains(&v) { - return 0.0; - } - let m = graph.size(weighted); - let source_community = partition.node_community(v); - let diff_source = - single_node_neighbor_cut_size(graph, v, &source_community.without(&v), weighted); - let diff_target = single_node_neighbor_cut_size(graph, v, &target, weighted); - - let node_weights = graph - .get_all_nodes() - .into_iter() - .map(|n| n.attributes.unwrap()) - .collect::>(); - let v_weight = node_weights[v]; - let source_weight = source_community - .iter() - .map(|n| node_weights[*n]) - .sum::(); - let target_weight = target.iter().map(|n| node_weights[*n]).sum::(); - - // let deg_v = match weighted { - // true => graph.get_node_weighted_degree_by_index(v), - // false => graph.get_node_degree_by_index(v) as f64, - // }; - // let degs_source = partition.degree_sum(v); - // let degs_target = match target.len() == 0 { - // true => 0.0, - // false => partition.degree_sum(*target.into_iter().next().unwrap()), - // }; - - let delta = diff_target - - diff_source - - resolution * v_weight * (v_weight + target_weight - source_weight); - - // MALCOLM - // println!("partition: {:?}", partition); - // println!("target: {:?}", target); - // println!("delta | v: {} target: {:?} delta: {}", v, target, delta); - delta -} - -fn single_node_neighbor_cut_size( - graph: &Graph, - v: usize, - community: &IntSet, - weighted: bool, -) -> f64 -where - T: Hash + Eq + Clone + Ord + Display + Send + Sync, - A: Clone + Send + Sync, -{ - graph - .get_successor_nodes_by_index(&v) - .into_iter() - .filter(|x| community.contains(&x.node_index)) - .map(|x| match weighted { - true => x.weight, - false => 1.0, - }) - .sum() -} - #[cfg(test)] mod tests { use super::*; use crate::{Edge, Graph, GraphSpecs, Node}; - use assert_approx_eq::assert_approx_eq; use rand::SeedableRng; use rand_chacha::ChaCha20Rng; use std::sync::Arc; - #[test] - fn test_single_node_neighbor_cut_size_1() { - let edges: Vec>> = vec![ - Edge::new(0, 1), - Edge::new(1, 2), - Edge::new(1, 3), - Edge::new(1, 4), - ]; - let specs = GraphSpecs::directed_create_missing(); - let graph = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); - let community = vec![1, 2, 3].into_iter().collect(); - let result = single_node_neighbor_cut_size(&graph, 0, &community, false); - assert_eq!(result, 1.0); - let result = single_node_neighbor_cut_size(&graph, 1, &community, false); - assert_eq!(result, 2.0); - let result = single_node_neighbor_cut_size(&graph, 2, &community, false); - assert_eq!(result, 0.0); - } - - #[test] - fn test_single_node_neighbor_cut_size_2() { - let edges: Vec>> = vec![ - Edge::with_weight(0, 1, 1.1), - Edge::with_weight(1, 2, 2.3), - Edge::with_weight(1, 3, 3.5), - Edge::with_weight(1, 4, 4.7), - ]; - let specs = GraphSpecs::directed_create_missing(); - let graph = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); - let community = vec![1, 2, 3].into_iter().collect(); - let result = single_node_neighbor_cut_size(&graph, 0, &community, true); - assert_eq!(result, 1.1); - let result = single_node_neighbor_cut_size(&graph, 1, &community, true); - assert_eq!(result, 5.8); - let result = single_node_neighbor_cut_size(&graph, 2, &community, true); - assert_eq!(result, 0.0); - } - - #[test] - fn test_get_delta_1() { - let edges: Vec>> = vec![ - Edge::with_weight(0, 1, 1.1), - Edge::with_weight(1, 2, 2.3), - Edge::with_weight(1, 3, 3.5), - Edge::with_weight(1, 4, 4.7), - ]; - let specs = GraphSpecs::directed_create_missing(); - let graph = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); - let partition = Partition { - partition: vec![ - vec![0, 1].into_iter().collect(), - vec![2, 3, 4].into_iter().collect(), - ], - node_partition: vec![0, 0, 1, 1, 1], - degree_sums: vec![12.0, 24.0], - }; - let target = vec![2, 3, 4].into_iter().collect(); - let result = get_delta( - 1, - &partition, - &target, - &graph, - true, - &QualityFunction::Modularity, - 1.0, - ); - assert_approx_eq!(result, -0.11206896551724145); - } - - #[test] - fn test_get_delta_2() { - let edges: Vec>> = vec![ - Edge::with_weight(0, 1, 1.1), - Edge::with_weight(1, 2, 2.3), - Edge::with_weight(1, 3, 3.5), - Edge::with_weight(1, 4, 4.7), - ]; - let specs = GraphSpecs::undirected_create_missing(); - let graph = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); - let partition = Partition { - partition: vec![ - vec![0, 1].into_iter().collect(), - vec![2, 3, 4].into_iter().collect(), - ], - node_partition: vec![0, 0, 1, 1, 1], - degree_sums: vec![12.0, 24.0], - }; - let target = vec![2, 3, 4].into_iter().collect(); - let result = get_delta( - 1, - &partition, - &target, - &graph, - true, - &QualityFunction::Modularity, - 1.0, - ); - assert_approx_eq!(result, -0.20689655172413812); - } - - #[test] - fn test_get_adjacent_communities() { - let nodes = vec![ - Node::from_name(0), - Node::from_name(1), - Node::from_name(2), - Node::from_name(3), - Node::from_name(4), - ]; - let edges: Vec>> = vec![ - Edge::new(0, 2), - Edge::new(1, 2), - Edge::new(2, 3), - Edge::new(2, 4), - ]; - let specs = GraphSpecs::directed_create_missing(); - let graph = Graph::new_from_nodes_and_edges(nodes, edges, specs).unwrap(); - let partition = Partition { - partition: vec![ - vec![0, 1].into_iter().collect(), - vec![2].into_iter().collect(), - vec![3].into_iter().collect(), - vec![4].into_iter().collect(), - ], - node_partition: vec![0, 0, 1, 2, 3], - degree_sums: vec![0.0, 0.0, 0.0, 0.0], - }; - let empty = IntSet::default(); - let result = get_adjacent_communities(0, &graph, &partition, &empty); - assert_eq!(result.len(), 3); - assert!(result == vec![&partition.partition[0], &partition.partition[1], &empty]); - let result = get_adjacent_communities(1, &graph, &partition, &empty); - assert!(result == vec![&partition.partition[0], &partition.partition[1], &empty]); - let result = get_adjacent_communities(2, &graph, &partition, &empty); - assert!( - result - == vec![ - &partition.partition[1], - &partition.partition[2], - &partition.partition[3], - &empty - ] - ); - } - - #[test] - fn test_argmax_1() { - let graph = get_graph_for_argmax(true); - let partition = get_partition_for_argmax(); - let empty = IntSet::default(); - let communities = get_communities_for_argmax(&partition, &graph, &empty); - let result = argmax( - 0, - &partition, - &communities, - &graph, - true, - &QualityFunction::Modularity, - 1.0, - ); - assert_eq!(result.0.len(), 1); - assert!(result.0.contains(&2)); - assert_approx_eq!(result.1, 0.09033145065398336); - let result = argmax( - 0, - &partition, - &communities, - &graph, - false, - &QualityFunction::Modularity, - 1.0, - ); - assert_eq!(result.0.len(), 1); - assert!(result.0.contains(&2)); - assert_approx_eq!(result.1, 0.21875); - } - - #[test] - fn test_argmax_2() { - let graph = get_graph_for_argmax(false); - let partition = get_partition_for_argmax(); - let empty = IntSet::default(); - let communities = get_communities_for_argmax(&partition, &graph, &empty); - let result = argmax( - 0, - &partition, - &communities, - &graph, - true, - &QualityFunction::Modularity, - 1.0, - ); - assert_eq!(result.0.len(), 1); - assert!(result.0.contains(&2)); - assert_approx_eq!(result.1, 0.09033145065398336); - let result = argmax( - 0, - &partition, - &communities, - &graph, - false, - &QualityFunction::Modularity, - 1.0, - ); - assert_eq!(result.0.len(), 1); - assert!(result.0.contains(&2)); - assert_approx_eq!(result.1, 0.21875); - } - #[test] fn test_move_node() { let graph = get_graph_for_argmax(true); @@ -828,12 +423,4 @@ mod tests { degree_sums: vec![0.0, 0.0, 0.0, 0.0], } } - - fn get_communities_for_argmax<'a>( - partition: &'a Partition, - graph: &Graph, - empty: &'a IntSet, - ) -> Vec<&'a IntSet> { - get_adjacent_communities(0, &graph, &partition, empty) - } } diff --git a/src/algorithms/community/leiden/partition.rs b/src/algorithms/community/leiden/partition.rs index 5c2e97b..577cbe5 100644 --- a/src/algorithms/community/leiden/partition.rs +++ b/src/algorithms/community/leiden/partition.rs @@ -38,9 +38,6 @@ impl Partition { self.degree_sums.push(0.0); } - // println!("source_partition_idx: {:?}", source_partition_idx); - // println!("target_partition_idx: {:?}", target_partition_idx); - // Remove `v` from its old community and place it into the target partition self.partition[source_partition_idx].remove(&v); self.partition[target_partition_idx].insert(v); @@ -75,7 +72,6 @@ impl Partition { } pub fn from_partition(graph: &Graph, partition: Vec>) -> Partition { - // println!("degrees: {:?}", graph.get_weighted_degree_for_all_nodes()); let node_partition: Vec = partition .iter() .enumerate() @@ -98,6 +94,25 @@ impl Partition { } } + pub fn get_adjacent_communities<'a>( + &'a self, + node: usize, + graph: &Graph, + empty: &'a IntSet, + ) -> Vec<&'a IntSet> { + let mut adjacent_community_ids: IntSet = IntSet::default(); + adjacent_community_ids.insert(self.node_partition[node]); + for u in graph.get_successor_nodes_by_index(&node) { + adjacent_community_ids.insert(self.node_partition[u.node_index]); + } + let mut adjacent_communities: Vec<&IntSet> = adjacent_community_ids + .into_iter() + .map(|i| &self.partition[i]) + .collect(); + adjacent_communities.push(&empty); + adjacent_communities + } + pub fn flatten(self, aggregate_graph: &AggregateGraph) -> Self { if aggregate_graph.parent_graph.is_none() { return self; @@ -164,4 +179,49 @@ mod tests { }; Graph::new_from_nodes_and_edges(nodes, edges, specs).unwrap() } + + #[test] + fn test_get_adjacent_communities() { + let nodes = vec![ + Node::from_name(0), + Node::from_name(1), + Node::from_name(2), + Node::from_name(3), + Node::from_name(4), + ]; + let edges: Vec>> = vec![ + Edge::new(0, 2), + Edge::new(1, 2), + Edge::new(2, 3), + Edge::new(2, 4), + ]; + let specs = GraphSpecs::directed_create_missing(); + let graph = Graph::new_from_nodes_and_edges(nodes, edges, specs).unwrap(); + let partition = Partition { + partition: vec![ + vec![0, 1].into_iter().collect(), + vec![2].into_iter().collect(), + vec![3].into_iter().collect(), + vec![4].into_iter().collect(), + ], + node_partition: vec![0, 0, 1, 2, 3], + degree_sums: vec![0.0, 0.0, 0.0, 0.0], + }; + let empty = IntSet::default(); + let result = partition.get_adjacent_communities(0, &graph, &empty); + assert_eq!(result.len(), 3); + assert!(result == vec![&partition.partition[0], &partition.partition[1], &empty]); + let result = partition.get_adjacent_communities(1, &graph, &empty); + assert!(result == vec![&partition.partition[0], &partition.partition[1], &empty]); + let result = partition.get_adjacent_communities(2, &graph, &empty); + assert!( + result + == vec![ + &partition.partition[1], + &partition.partition[2], + &partition.partition[3], + &empty + ] + ); + } } diff --git a/src/algorithms/community/leiden/quality.rs b/src/algorithms/community/leiden/quality.rs new file mode 100644 index 0000000..5a720e1 --- /dev/null +++ b/src/algorithms/community/leiden/quality.rs @@ -0,0 +1,366 @@ +use crate::{ext::hashset::IntSetExt, Graph}; +use core::f64; +use nohash::IntSet; + +use super::partition::Partition; + +pub enum QualityFunction { + Modularity, + CPM, +} + +pub fn argmax( + v: usize, + partition: &Partition, + communities: &[&IntSet], + graph: &Graph, + weighted: bool, + quality_function: &QualityFunction, + resolution: f64, +) -> (IntSet, f64) { + let mut opt: IntSet = communities[0].iter().cloned().collect(); + let mut val = get_delta( + v, + partition, + &opt, + graph, + weighted, + &quality_function, + resolution, + ); + for k in 1..communities.len() { + let optk = &communities[k]; + let valk = get_delta( + v, + partition, + optk, + graph, + weighted, + &quality_function, + resolution, + ); + if valk > val { + opt = optk.iter().cloned().collect(); + val = valk; + } + } + (opt, val) +} + +pub fn get_delta( + v: usize, + partition: &Partition, + target: &IntSet, + graph: &Graph, + weighted: bool, + quality_function: &QualityFunction, + resolution: f64, +) -> f64 { + match quality_function { + QualityFunction::Modularity => { + get_delta_modularity(v, partition, target, graph, weighted, resolution) + } + QualityFunction::CPM => get_delta_cpm(v, partition, target, graph, weighted, resolution), + } +} + +fn get_delta_modularity( + v: usize, + partition: &Partition, + target: &IntSet, + graph: &Graph, + weighted: bool, + resolution: f64, +) -> f64 { + if target.contains(&v) { + return 0.0; + } + let m = graph.size(weighted); + let source_community = partition.node_community(v); + let diff_source = + single_node_neighbor_cut_size(graph, v, &source_community.without(&v), weighted); + let diff_target = single_node_neighbor_cut_size(graph, v, &target, weighted); + let deg_v = match weighted { + true => graph.get_node_weighted_degree_by_index(v), + false => graph.get_node_degree_by_index(v) as f64, + }; + let degs_source = partition.degree_sum(v); + let degs_target = match target.len() == 0 { + true => 0.0, + false => partition.degree_sum(*target.into_iter().next().unwrap()), + }; + + let delta = ((diff_target - diff_source) + - resolution / (2.0 * m) * (deg_v.powf(2.0) + deg_v * (degs_target - degs_source))) + / m; + + delta +} + +fn get_delta_cpm( + v: usize, + partition: &Partition, + target: &IntSet, + graph: &Graph, + weighted: bool, + resolution: f64, +) -> f64 { + if target.contains(&v) { + return 0.0; + } + let source_community = partition.node_community(v); + let diff_source = + single_node_neighbor_cut_size(graph, v, &source_community.without(&v), weighted); + let diff_target = single_node_neighbor_cut_size(graph, v, &target, weighted); + + let node_weights = graph + .get_all_nodes() + .into_iter() + .map(|n| n.attributes.unwrap()) + .collect::>(); + let v_weight = node_weights[v]; + let source_weight = source_community + .iter() + .map(|n| node_weights[*n]) + .sum::(); + let target_weight = target.iter().map(|n| node_weights[*n]).sum::(); + + let delta = diff_target + - diff_source + - resolution * v_weight * (v_weight + target_weight - source_weight); + + delta +} + +fn single_node_neighbor_cut_size( + graph: &Graph, + v: usize, + community: &IntSet, + weighted: bool, +) -> f64 { + graph + .get_successor_nodes_by_index(&v) + .into_iter() + .filter(|x| community.contains(&x.node_index)) + .map(|x| match weighted { + true => x.weight, + false => 1.0, + }) + .sum() +} + +#[cfg(test)] +mod tests { + + use super::*; + use crate::{Edge, Graph, GraphSpecs, Node}; + use assert_approx_eq::assert_approx_eq; + use std::sync::Arc; + + #[test] + fn test_single_node_neighbor_cut_size_1() { + let edges: Vec>> = vec![ + Edge::new(0, 1), + Edge::new(1, 2), + Edge::new(1, 3), + Edge::new(1, 4), + ]; + let specs = GraphSpecs::directed_create_missing(); + let graph = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); + let community = vec![1, 2, 3].into_iter().collect(); + let result = single_node_neighbor_cut_size(&graph, 0, &community, false); + assert_eq!(result, 1.0); + let result = single_node_neighbor_cut_size(&graph, 1, &community, false); + assert_eq!(result, 2.0); + let result = single_node_neighbor_cut_size(&graph, 2, &community, false); + assert_eq!(result, 0.0); + } + + #[test] + fn test_single_node_neighbor_cut_size_2() { + let edges: Vec>> = vec![ + Edge::with_weight(0, 1, 1.1), + Edge::with_weight(1, 2, 2.3), + Edge::with_weight(1, 3, 3.5), + Edge::with_weight(1, 4, 4.7), + ]; + let specs = GraphSpecs::directed_create_missing(); + let graph = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); + let community = vec![1, 2, 3].into_iter().collect(); + let result = single_node_neighbor_cut_size(&graph, 0, &community, true); + assert_eq!(result, 1.1); + let result = single_node_neighbor_cut_size(&graph, 1, &community, true); + assert_eq!(result, 5.8); + let result = single_node_neighbor_cut_size(&graph, 2, &community, true); + assert_eq!(result, 0.0); + } + + #[test] + fn test_get_delta_1() { + let edges: Vec>> = vec![ + Edge::with_weight(0, 1, 1.1), + Edge::with_weight(1, 2, 2.3), + Edge::with_weight(1, 3, 3.5), + Edge::with_weight(1, 4, 4.7), + ]; + let specs = GraphSpecs::directed_create_missing(); + let graph = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); + let partition = Partition { + partition: vec![ + vec![0, 1].into_iter().collect(), + vec![2, 3, 4].into_iter().collect(), + ], + node_partition: vec![0, 0, 1, 1, 1], + degree_sums: vec![12.0, 24.0], + }; + let target = vec![2, 3, 4].into_iter().collect(); + let result = get_delta( + 1, + &partition, + &target, + &graph, + true, + &QualityFunction::Modularity, + 1.0, + ); + assert_approx_eq!(result, -0.11206896551724145); + } + + #[test] + fn test_get_delta_2() { + let edges: Vec>> = vec![ + Edge::with_weight(0, 1, 1.1), + Edge::with_weight(1, 2, 2.3), + Edge::with_weight(1, 3, 3.5), + Edge::with_weight(1, 4, 4.7), + ]; + let specs = GraphSpecs::undirected_create_missing(); + let graph = Graph::new_from_nodes_and_edges(vec![], edges, specs).unwrap(); + let partition = Partition { + partition: vec![ + vec![0, 1].into_iter().collect(), + vec![2, 3, 4].into_iter().collect(), + ], + node_partition: vec![0, 0, 1, 1, 1], + degree_sums: vec![12.0, 24.0], + }; + let target = vec![2, 3, 4].into_iter().collect(); + let result = get_delta( + 1, + &partition, + &target, + &graph, + true, + &QualityFunction::Modularity, + 1.0, + ); + assert_approx_eq!(result, -0.20689655172413812); + } + + #[test] + fn test_argmax_1() { + let graph = get_graph_for_argmax(true); + let partition = get_partition_for_argmax(); + let empty = IntSet::default(); + let communities = get_communities_for_argmax(&partition, &graph, &empty); + let result = argmax( + 0, + &partition, + &communities, + &graph, + true, + &QualityFunction::Modularity, + 1.0, + ); + assert_eq!(result.0.len(), 1); + assert!(result.0.contains(&2)); + assert_approx_eq!(result.1, 0.09033145065398336); + let result = argmax( + 0, + &partition, + &communities, + &graph, + false, + &QualityFunction::Modularity, + 1.0, + ); + assert_eq!(result.0.len(), 1); + assert!(result.0.contains(&2)); + assert_approx_eq!(result.1, 0.21875); + } + + #[test] + fn test_argmax_2() { + let graph = get_graph_for_argmax(false); + let partition = get_partition_for_argmax(); + let empty = IntSet::default(); + let communities = get_communities_for_argmax(&partition, &graph, &empty); + let result = argmax( + 0, + &partition, + &communities, + &graph, + true, + &QualityFunction::Modularity, + 1.0, + ); + assert_eq!(result.0.len(), 1); + assert!(result.0.contains(&2)); + assert_approx_eq!(result.1, 0.09033145065398336); + let result = argmax( + 0, + &partition, + &communities, + &graph, + false, + &QualityFunction::Modularity, + 1.0, + ); + assert_eq!(result.0.len(), 1); + assert!(result.0.contains(&2)); + assert_approx_eq!(result.1, 0.21875); + } + + fn get_graph_for_argmax(directed: bool) -> Graph { + let nodes = vec![ + Node::from_name(0), + Node::from_name(1), + Node::from_name(2), + Node::from_name(3), + Node::from_name(4), + ]; + let edges: Vec>> = vec![ + Edge::with_weight(0, 2, 1.1), + Edge::with_weight(1, 2, 2.3), + Edge::with_weight(2, 3, 3.5), + Edge::with_weight(2, 4, 4.7), + ]; + let specs = if directed { + GraphSpecs::directed_create_missing() + } else { + GraphSpecs::undirected_create_missing() + }; + Graph::new_from_nodes_and_edges(nodes, edges, specs).unwrap() + } + + fn get_partition_for_argmax() -> Partition { + Partition { + partition: vec![ + vec![0, 1].into_iter().collect(), + vec![2].into_iter().collect(), + vec![3].into_iter().collect(), + vec![4].into_iter().collect(), + ], + node_partition: vec![0, 0, 1, 2, 3], + degree_sums: vec![0.0, 0.0, 0.0, 0.0], + } + } + + fn get_communities_for_argmax<'a>( + partition: &'a Partition, + graph: &Graph, + empty: &'a IntSet, + ) -> Vec<&'a IntSet> { + partition.get_adjacent_communities(0, &graph, empty) + } +} From 963605296fa412e64073b9d2a3cfdccaf4797a48 Mon Sep 17 00:00:00 2001 From: Malcolm van Raalte Date: Thu, 9 Jan 2025 14:03:49 -0500 Subject: [PATCH 7/9] wip --- src/algorithms/community/leiden/mod.rs | 42 +++++++++++----------- src/algorithms/community/leiden/quality.rs | 7 ++++ src/algorithms/community/utility.rs | 2 +- 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/src/algorithms/community/leiden/mod.rs b/src/algorithms/community/leiden/mod.rs index f8665ca..df5b155 100644 --- a/src/algorithms/community/leiden/mod.rs +++ b/src/algorithms/community/leiden/mod.rs @@ -3,10 +3,10 @@ use crate::{ algorithms::cuts::cut_size_by_indexes, ext::hashset::IntSetExt, Error, ErrorKind, Graph, }; use core::f64; -use itertools::Itertools; use nohash::IntSet; +use rand::distributions::Distribution; use rand::distributions::WeightedIndex; -use rand::{distributions::Distribution, RngCore}; +use rand::prelude::StdRng; use std::collections::{HashSet, VecDeque}; use std::fmt::Debug; use std::fmt::Display; @@ -40,7 +40,12 @@ guaranteeing well-connected communities" by V.A. Traag, L. Waltman and N.J. van # Examples - +``` +use graphrs::{algorithms::community::leiden::{leiden, QualityFunction}, generators}; +let graph = generators::social::karate_club_graph(); +let communities = leiden(&graph, true, QualityFunction::CPM, None, None, None); +assert_eq!(communities.unwrap().len(), 4); +``` */ pub fn leiden( graph: &Graph, @@ -160,7 +165,7 @@ fn refine_partition( gamma: f64, ) -> Partition { let mut refined_partition = get_singleton_partition(&aggregate_graph.graph, true); - let mut rng: Box = Box::new(rand::thread_rng()); + let mut rng: StdRng = utility::get_rng(None); for community in partition.partition.iter() { merge_nodes_subset( &mut refined_partition, @@ -184,7 +189,7 @@ fn merge_nodes_subset( resolution: f64, theta: f64, gamma: f64, - rng: &mut Box, + rng: &mut StdRng, ) { let size_s = aggregate_graph.node_total(community); let communities_of_size: IntSet = community @@ -198,7 +203,7 @@ fn merge_nodes_subset( x >= gamma * v_node_total * (size_s - v_node_total) }) .collect(); - for v in communities_of_size.into_iter().sorted() { + for v in communities_of_size { if partition.node_community(v).len() != 1 { continue; } @@ -276,8 +281,6 @@ mod tests { use super::*; use crate::{Edge, Graph, GraphSpecs, Node}; - use rand::SeedableRng; - use rand_chacha::ChaCha20Rng; use std::sync::Arc; #[test] @@ -303,7 +306,7 @@ mod tests { #[test] fn test_merge_nodes_subset_1() { let (mut partition, community, aggregate_graph) = get_params_for_merge_nodes_subset(); - let mut rng: Box = Box::new(ChaCha20Rng::seed_from_u64(1)); + let mut rng: StdRng = utility::get_rng(Some(1)); merge_nodes_subset( &mut partition, &community, @@ -314,22 +317,21 @@ mod tests { 0.05, &mut rng, ); - assert_eq!(partition.node_partition, vec![1, 0, 1, 2, 2, 2]); + assert_eq!(partition.node_partition, vec![0, 0, 0, 1, 1, 1]); assert_eq!( partition.partition, vec![ - vec![1].into_iter().collect(), - vec![0, 2].into_iter().collect(), + vec![0, 1, 2].into_iter().collect(), vec![3, 4, 5].into_iter().collect(), ] ); - assert_eq!(partition.degree_sums, vec![3.3, 12.3, 20.5]); + assert_eq!(partition.degree_sums, vec![15.600000000000001, 18.9]); } #[test] fn test_merge_nodes_subset_2() { let (mut partition, community, aggregate_graph) = get_params_for_merge_nodes_subset(); - let mut rng: Box = Box::new(ChaCha20Rng::seed_from_u64(4)); + let mut rng: StdRng = utility::get_rng(Some(3)); merge_nodes_subset( &mut partition, &community, @@ -340,16 +342,16 @@ mod tests { 0.05, &mut rng, ); - assert_eq!(partition.node_partition, vec![0, 0, 0, 1, 2, 2]); + assert_eq!(partition.node_partition, vec![0, 1, 0, 2, 2, 2]); assert_eq!( partition.partition, vec![ - vec![0, 1, 2].into_iter().collect(), - vec![3].into_iter().collect(), - vec![4, 5].into_iter().collect(), + vec![0, 2].into_iter().collect(), + vec![1].into_iter().collect(), + vec![3, 4, 5].into_iter().collect(), ] ); - assert_eq!(partition.degree_sums, vec![15.600000000000001, 6.2, 12.6]); + assert_eq!(partition.degree_sums, vec![10.8, 3.3, 18.9]); } fn get_params_for_merge_nodes_subset<'a>() -> (Partition, IntSet, AggregateGraph) { @@ -365,7 +367,7 @@ mod tests { Edge::with_weight(0, 1, 1.1), Edge::with_weight(1, 2, 2.2), Edge::with_weight(0, 2, 3.7), - Edge::with_weight(2, 3, 1.7), + Edge::with_weight(2, 3, 0.1), Edge::with_weight(3, 4, 2.1), Edge::with_weight(4, 5, 3.2), Edge::with_weight(3, 5, 4.1), diff --git a/src/algorithms/community/leiden/quality.rs b/src/algorithms/community/leiden/quality.rs index 5a720e1..4f44722 100644 --- a/src/algorithms/community/leiden/quality.rs +++ b/src/algorithms/community/leiden/quality.rs @@ -4,6 +4,13 @@ use nohash::IntSet; use super::partition::Partition; +/** +The quality function to use for the Leiden algorithm. +[Modularity]() is a traditional +method of assessing how well a set of communities partition a graph. +[Constant Potts Model]() +is similar to modularity. +*/ pub enum QualityFunction { Modularity, CPM, diff --git a/src/algorithms/community/utility.rs b/src/algorithms/community/utility.rs index 5d34d38..4a9d5f7 100644 --- a/src/algorithms/community/utility.rs +++ b/src/algorithms/community/utility.rs @@ -18,7 +18,7 @@ where } /// Returns a random number generator (RNG), optionally seeded. -fn get_rng(seed: Option) -> StdRng { +pub fn get_rng(seed: Option) -> StdRng { match seed { None => { let mut trng = thread_rng(); From 7a443b8d66a352a3fb32edf38a03c31e7b54766d Mon Sep 17 00:00:00 2001 From: Malcolm van Raalte Date: Thu, 9 Jan 2025 16:13:51 -0500 Subject: [PATCH 8/9] finished (unoptimized) --- README.md | 2 + src/algorithms/community/leiden/mod.rs | 193 ++++++++++++++----- src/algorithms/community/leiden/partition.rs | 19 ++ src/lib.rs | 2 + 4 files changed, 164 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 0372e11..38d74b5 100644 --- a/README.md +++ b/README.md @@ -139,8 +139,10 @@ let centralities = centrality::eigenvector::eigenvector_centrality(&graph, false ```rust use graphrs::{algorithms::{community}, generators}; +use graphrs::{algorithms::community::leiden::{leiden, QualityFunction}}; let graph = generators::social::karate_club_graph(); let partitions = community::louvain::louvain_partitions(&graph, false, None, None, Some(1)); +let partitions = leiden(&graph, true, QualityFunction::CPM, None, None, None); ``` ### Read and write graphml files diff --git a/src/algorithms/community/leiden/mod.rs b/src/algorithms/community/leiden/mod.rs index df5b155..efbb00a 100644 --- a/src/algorithms/community/leiden/mod.rs +++ b/src/algorithms/community/leiden/mod.rs @@ -73,6 +73,7 @@ where let mut aggregate_graph = AggregateGraph::initial(graph, weighted); let mut partition = get_singleton_partition(graph, weighted); let mut prev_partition: Option = None; + let mut rng: StdRng = utility::get_rng(None); loop { partition = move_nodes_fast( &aggregate_graph.graph, @@ -81,13 +82,7 @@ where &quality_function, _resolution, ); - if partitions::partition_is_singleton(&partition.partition, graph.number_of_nodes()) - || (prev_partition.is_some() - && partitions::partitions_eq( - &partition.partition, - &prev_partition.unwrap().partition, - )) - { + if is_done(&aggregate_graph.graph, &partition, &prev_partition) { let flattened = partition.flatten(&aggregate_graph); return Ok(partitions::convert_usize_partitions_to_t( flattened.partition, @@ -102,27 +97,18 @@ where _resolution, _theta, _gamma, + &mut rng, ); aggregate_graph = aggregate_graph.from_partition(&refined_partition); - let partitions: Vec> = partition - .partition - .iter() - .map(|c| { - aggregate_graph - .node_nodes - .as_ref() - .unwrap() - .iter() - .enumerate() - .filter(|(_i, nodes)| nodes.is_subset(c)) - .map(|(i, _nodes)| i) - .collect() - }) - .collect(); + let partitions = partition.get_lifted_partitions(&aggregate_graph); partition = Partition::from_partition(&aggregate_graph.graph, partitions); } } +/** +Perform fast local node moves to communities to improve the partition's quality. +For every node, greedily move it to a neighboring community, maximizing the improvement in the partition's quality. +*/ fn move_nodes_fast( graph: &Graph, partition: &mut Partition, @@ -156,6 +142,29 @@ fn move_nodes_fast( partition.clone() } +/** +Determines if the Leiden algorithm is done. The definition of done is when the current +partition is a singleton or when the current and previous partitions are equal +(no change was made). +*/ +fn is_done( + graph: &Graph, + partition: &Partition, + prev_partition: &Option, +) -> bool { + let partition_is_singleton = + partitions::partition_is_singleton(&partition.partition, graph.number_of_nodes()); + let partitions_eq = prev_partition.is_some() + && partitions::partitions_eq( + &partition.partition, + &prev_partition.as_ref().unwrap().partition, + ); + partition_is_singleton || partitions_eq +} + +/** +Refine all communities by merging repeatedly, starting from a singleton partition. +*/ fn refine_partition( aggregate_graph: &AggregateGraph, partition: &Partition, @@ -163,9 +172,9 @@ fn refine_partition( resolution: f64, theta: f64, gamma: f64, + rng: &mut StdRng, ) -> Partition { let mut refined_partition = get_singleton_partition(&aggregate_graph.graph, true); - let mut rng: StdRng = utility::get_rng(None); for community in partition.partition.iter() { merge_nodes_subset( &mut refined_partition, @@ -175,12 +184,15 @@ fn refine_partition( resolution, theta, gamma, - &mut rng, + rng, ); } refined_partition } +/** +Merge the nodes in the subset `community` into one or more sets to refine the partition. +*/ fn merge_nodes_subset( partition: &mut Partition, community: &IntSet, @@ -253,6 +265,9 @@ fn merge_nodes_subset( } } +/** +Gets a partition of `Graph` where each partition contains one node. +*/ fn get_singleton_partition(graph: &Graph, weighted: bool) -> Partition where T: Hash + Eq + Clone + Ord + Display + Send + Sync, @@ -281,12 +296,38 @@ mod tests { use super::*; use crate::{Edge, Graph, GraphSpecs, Node}; + use assert_approx_eq::assert_approx_eq; use std::sync::Arc; + #[test] + fn test_is_done() { + let graph = get_graph_1(); + + let partition = get_partition_1(); + let prev_partition = get_partition_1(); + assert!(is_done(&graph, &partition, &Some(prev_partition.clone()))); + + let prev_partition = get_partition_2(); + assert!(!is_done(&graph, &partition, &Some(prev_partition.clone()))); + + let partition = Partition { + partition: vec![ + vec![0].into_iter().collect(), + vec![1].into_iter().collect(), + vec![2].into_iter().collect(), + vec![3].into_iter().collect(), + vec![4].into_iter().collect(), + ], + node_partition: vec![0, 1, 2, 3, 4], + degree_sums: vec![4.8, 3.3, 7.5, 6.2, 5.3], + }; + assert!(is_done(&graph, &partition, &Some(prev_partition))); + } + #[test] fn test_move_node() { - let graph = get_graph_for_argmax(true); - let mut partition = get_partition_for_argmax(); + let graph = get_graph_1(); + let mut partition = get_partition_1(); let mut target = IntSet::default(); target.insert(2); partition.move_node(0, &target, &graph, true); @@ -354,26 +395,44 @@ mod tests { assert_eq!(partition.degree_sums, vec![10.8, 3.3, 18.9]); } + #[test] + fn test_refine_partition() { + let graph = get_graph_2(); + let aggregate_graph = AggregateGraph::initial(&graph, true); + let partition = Partition { + partition: vec![ + vec![0, 1, 2].into_iter().collect(), + vec![3, 4, 5].into_iter().collect(), + ], + node_partition: vec![0, 0, 0, 1, 1, 1], + degree_sums: vec![0.0, 0.0, 0.0, 0.0, 0.0, 0.0], + }; + let mut rng = utility::get_rng(Some(1)); + let refined_partition = refine_partition( + &aggregate_graph, + &partition, + &QualityFunction::Modularity, + 0.25, + 0.3, + 0.05, + &mut rng, + ); + assert_eq!(refined_partition.node_partition, vec![0, 0, 0, 1, 2, 1]); + assert_eq!( + refined_partition.partition, + vec![ + vec![0, 1, 2].into_iter().collect(), + vec![3, 5].into_iter().collect(), + vec![4].into_iter().collect(), + ] + ); + assert_approx_eq!(refined_partition.degree_sums[0], 14.1); + assert_approx_eq!(refined_partition.degree_sums[1], 13.6); + assert_approx_eq!(refined_partition.degree_sums[2], 5.3); + } + fn get_params_for_merge_nodes_subset<'a>() -> (Partition, IntSet, AggregateGraph) { - let nodes = vec![ - Node::from_name(0), - Node::from_name(1), - Node::from_name(2), - Node::from_name(3), - Node::from_name(4), - Node::from_name(5), - ]; - let edges: Vec>> = vec![ - Edge::with_weight(0, 1, 1.1), - Edge::with_weight(1, 2, 2.2), - Edge::with_weight(0, 2, 3.7), - Edge::with_weight(2, 3, 0.1), - Edge::with_weight(3, 4, 2.1), - Edge::with_weight(4, 5, 3.2), - Edge::with_weight(3, 5, 4.1), - ]; - let graph = - Graph::new_from_nodes_and_edges(nodes, edges, GraphSpecs::undirected()).unwrap(); + let graph = get_graph_2(); let partition = Partition { partition: vec![ vec![0].into_iter().collect(), @@ -391,7 +450,7 @@ mod tests { (partition, community, aggregate_graph) } - fn get_graph_for_argmax(directed: bool) -> Graph { + fn get_graph_1() -> Graph { let nodes = vec![ Node::from_name(0), Node::from_name(1), @@ -405,15 +464,32 @@ mod tests { Edge::with_weight(2, 3, 3.5), Edge::with_weight(2, 4, 4.7), ]; - let specs = if directed { - GraphSpecs::directed_create_missing() - } else { - GraphSpecs::undirected_create_missing() - }; + let specs = GraphSpecs::undirected_create_missing(); Graph::new_from_nodes_and_edges(nodes, edges, specs).unwrap() } - fn get_partition_for_argmax() -> Partition { + fn get_graph_2() -> Graph { + let nodes = vec![ + Node::from_name(0), + Node::from_name(1), + Node::from_name(2), + Node::from_name(3), + Node::from_name(4), + Node::from_name(5), + ]; + let edges: Vec>> = vec![ + Edge::with_weight(0, 1, 1.1), + Edge::with_weight(1, 2, 2.2), + Edge::with_weight(0, 2, 3.7), + Edge::with_weight(2, 3, 0.1), + Edge::with_weight(3, 4, 2.1), + Edge::with_weight(4, 5, 3.2), + Edge::with_weight(3, 5, 4.1), + ]; + Graph::new_from_nodes_and_edges(nodes, edges, GraphSpecs::undirected()).unwrap() + } + + fn get_partition_1() -> Partition { Partition { partition: vec![ vec![0, 1].into_iter().collect(), @@ -425,4 +501,17 @@ mod tests { degree_sums: vec![0.0, 0.0, 0.0, 0.0], } } + + fn get_partition_2() -> Partition { + Partition { + partition: vec![ + vec![0, 2].into_iter().collect(), + vec![1].into_iter().collect(), + vec![3].into_iter().collect(), + vec![4].into_iter().collect(), + ], + node_partition: vec![0, 1, 0, 2, 3], + degree_sums: vec![0.0, 0.0, 0.0, 0.0], + } + } } diff --git a/src/algorithms/community/leiden/partition.rs b/src/algorithms/community/leiden/partition.rs index 577cbe5..b5aabf8 100644 --- a/src/algorithms/community/leiden/partition.rs +++ b/src/algorithms/community/leiden/partition.rs @@ -125,6 +125,25 @@ impl Partition { .collect(); Partition::from_partition(graph, partitions) } + + pub fn get_lifted_partitions(&self, aggregate_graph: &AggregateGraph) -> Vec> { + let partitions: Vec> = self + .partition + .iter() + .map(|c| { + aggregate_graph + .node_nodes + .as_ref() + .unwrap() + .iter() + .enumerate() + .filter(|(_i, nodes)| nodes.is_subset(c)) + .map(|(i, _nodes)| i) + .collect() + }) + .collect(); + partitions + } } #[cfg(test)] diff --git a/src/lib.rs b/src/lib.rs index 715dfd3..50bd184 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -140,8 +140,10 @@ let centralities = centrality::eigenvector::eigenvector_centrality(&graph, false ```rust use graphrs::{algorithms::{community}, generators}; +use graphrs::{algorithms::community::leiden::{leiden, QualityFunction}}; let graph = generators::social::karate_club_graph(); let partitions = community::louvain::louvain_partitions(&graph, false, None, None, Some(1)); +let partitions = leiden(&graph, true, QualityFunction::CPM, None, None, None); ``` ### Read and write graphml files From 429dcea387ddef3853a6570a6760cc98887edc4e Mon Sep 17 00:00:00 2001 From: Malcolm van Raalte Date: Thu, 9 Jan 2025 16:24:07 -0500 Subject: [PATCH 9/9] add new modules to readme.md and lib.rs --- README.md | 2 ++ src/lib.rs | 2 ++ 2 files changed, 4 insertions(+) diff --git a/README.md b/README.md index 38d74b5..4968a84 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,8 @@ Python bindings are available in the [graphrs-python](