diff --git a/python/python/raphtory/algorithms/__init__.pyi b/python/python/raphtory/algorithms/__init__.pyi index 0920041416..3a98e39851 100644 --- a/python/python/raphtory/algorithms/__init__.pyi +++ b/python/python/raphtory/algorithms/__init__.pyi @@ -289,6 +289,7 @@ def pagerank( max_diff: Optional[float] = None, use_l2_norm: bool = True, damping_factor: float = 0.85, + weight: Optional[str] = None, ) -> NodeStateF64: """ Pagerank -- pagerank centrality value of the nodes in a graph @@ -305,6 +306,7 @@ def pagerank( is less than the max diff value given. use_l2_norm (bool): Flag for choosing the norm to use for convergence checks, True for l2 norm, False for l1 norm. Defaults to True. damping_factor (float): The damping factor for the PageRank calculation. Defaults to 0.85. + weight (Optional[str]): Edge property key to use as weight. If None, all edges have weight 1.0. Returns: NodeStateF64: Mapping of nodes to their pagerank value. diff --git a/python/tests/test_base_install/test_graphdb/test_algorithms.py b/python/tests/test_base_install/test_graphdb/test_algorithms.py index 44d9524eac..b20b8ca5fd 100644 --- a/python/tests/test_base_install/test_graphdb/test_algorithms.py +++ b/python/tests/test_base_install/test_graphdb/test_algorithms.py @@ -312,6 +312,36 @@ def test_page_rank(): assert actual == expected +def test_weighted_page_rank(): + g = Graph() + g.add_edge(0, 1, 2, {"weight": 0.37}) + g.add_edge(0, 1, 3, {"weight": 4.2}) + g.add_edge(0, 2, 1, {"weight": 0.9}) + g.add_edge(0, 2, 4, {"weight": 1.7}) + g.add_edge(0, 3, 1, {"weight": 2.6}) + g.add_edge(0, 3, 2, {"weight": 0.05}) + g.add_edge(0, 4, 3, {"weight": 3.3}) + g.add_edge(0, 4, 1, {"weight": 0.8}) + + actual = algorithms.pagerank(g, iter_count=1000, max_diff=1e-10, weight="weight") + for node, expected in [("1", 0.42499), ("2", 0.07353), ("3", 0.42311), ("4", 0.07837)]: + assert abs(actual[node] - expected) < 1e-5, f"node {node}: {actual[node]} != {expected}" + + +def test_weighted_page_rank_none_matches_unweighted(): + g = Graph() + g.add_edge(0, 1, 2, {"weight": 1.0}) + g.add_edge(0, 1, 4, {"weight": 1.0}) + g.add_edge(0, 2, 3, {"weight": 1.0}) + g.add_edge(0, 3, 1, {"weight": 1.0}) + g.add_edge(0, 4, 1, {"weight": 1.0}) + + unweighted = algorithms.pagerank(g, iter_count=1000) + weighted = algorithms.pagerank(g, iter_count=1000, weight="weight") + for node in ["1", "2", "3", "4"]: + assert abs(unweighted[node] - weighted[node]) < 1e-5, f"node {node} differs" + + def test_temporal_reachability(): g = gen_graph() diff --git a/raphtory-benchmark/benches/algobench.rs b/raphtory-benchmark/benches/algobench.rs index 3465253c76..9c61545e77 100644 --- a/raphtory-benchmark/benches/algobench.rs +++ b/raphtory-benchmark/benches/algobench.rs @@ -1,7 +1,7 @@ use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, SamplingMode}; use raphtory::{ algorithms::{ - centrality::pagerank::unweighted_page_rank, + centrality::pagerank::page_rank, components::weakly_connected_components, metrics::clustering_coefficient::{ global_clustering_coefficient::global_clustering_coefficient, @@ -87,7 +87,7 @@ pub fn graphgen_large_pagerank(c: &mut Criterion) { &graph, |b, graph| { b.iter(|| { - let result = unweighted_page_rank(graph, Some(100), None, None, true, None); + let result = page_rank(graph, None, Some(100), None, None, true, None); black_box(result); }); }, diff --git a/raphtory-graphql/schema.graphql b/raphtory-graphql/schema.graphql index 676caf622f..ec6e27dd4a 100644 --- a/raphtory-graphql/schema.graphql +++ b/raphtory-graphql/schema.graphql @@ -1184,7 +1184,7 @@ type Graph { } type GraphAlgorithmPlugin { - pagerank(iterCount: Int!, threads: Int, tol: Float): [PagerankOutput!]! + pagerank(iterCount: Int!, threads: Int, tol: Float, weight: String): [PagerankOutput!]! shortest_path(source: String!, targets: [String!]!, direction: String): [ShortestPathOutput!]! } diff --git a/raphtory-graphql/src/model/plugins/algorithms.rs b/raphtory-graphql/src/model/plugins/algorithms.rs index 4d16c96a2d..83ed192d1b 100644 --- a/raphtory-graphql/src/model/plugins/algorithms.rs +++ b/raphtory-graphql/src/model/plugins/algorithms.rs @@ -9,7 +9,7 @@ use itertools::Itertools; use ordered_float::OrderedFloat; use raphtory::{ algorithms::{ - centrality::pagerank::unweighted_page_rank, + centrality::pagerank::page_rank, pathing::dijkstra::dijkstra_single_source_shortest_paths, }, prelude::NodeViewOps, @@ -70,6 +70,7 @@ impl<'a> Operation<'a, GraphAlgorithmPlugin> for Pagerank { ("iterCount", TypeRef::named_nn(TypeRef::INT)), // _nn stands for not null ("threads", TypeRef::named(TypeRef::INT)), // this one though might be null ("tol", TypeRef::named(TypeRef::FLOAT)), + ("weight", TypeRef::named(TypeRef::STRING)), ] } @@ -96,8 +97,10 @@ fn apply_pagerank<'b>( .get("damping_factor") .map(|v| v.f64()) .transpose()?; - let binding = unweighted_page_rank( + let weight = ctx.args.get("weight").map(|v| v.string()).transpose()?; + let binding = page_rank( &entry_point.graph, + weight.as_deref(), Some(iter_count), threads, tol, diff --git a/raphtory/src/algorithms/centrality/pagerank.rs b/raphtory/src/algorithms/centrality/pagerank.rs index cf762530d5..e31c32a9ff 100644 --- a/raphtory/src/algorithms/centrality/pagerank.rs +++ b/raphtory/src/algorithms/centrality/pagerank.rs @@ -3,7 +3,7 @@ use crate::{ db::{ api::{ state::NodeState, - view::{NodeViewOps, StaticGraphViewOps}, + view::{EdgeViewOps, NodeViewOps, StaticGraphViewOps}, }, task::{ context::Context, @@ -11,21 +11,22 @@ use crate::{ task_runner::TaskRunner, }, }, - prelude::GraphViewOps, + prelude::{GraphViewOps, PropertiesOps}, }; use num_traits::abs; +use raphtory_api::core::entities::properties::prop::PropUnwrap; #[derive(Clone, Debug, Default)] struct PageRankState { score: f64, - out_degree: usize, + weighted_out_degree: f64, } impl PageRankState { fn new(num_nodes: usize) -> Self { Self { score: 1f64 / num_nodes as f64, - out_degree: 0, + weighted_out_degree: 0f64, } } @@ -40,6 +41,7 @@ impl PageRankState { /// # Arguments /// /// - `g`: A GraphView object +/// - `weight`: Edge property key to use as weight. If None, all edges have weight 1.0. /// - `iter_count`: Number of iterations to run the algorithm for /// - `threads`: Number of threads to use for parallel execution /// - `tol`: The tolerance value for convergence @@ -50,8 +52,9 @@ impl PageRankState { /// /// An [AlgorithmResult] object containing the mapping from node ID to the PageRank score of the node /// -pub fn unweighted_page_rank( +pub fn page_rank( g: &G, + weight: Option<&str>, iter_count: Option, threads: Option, tol: Option, @@ -76,38 +79,53 @@ pub fn unweighted_page_rank( ctx.global_agg_reset(total_sink_contribution); - let step1 = ATask::new(move |s| { - let out_degree = s.out_degree(); - let state: &mut PageRankState = s.get_mut(); - state.out_degree = out_degree; - Step::Continue + let weight_key: Option = weight.map(|s| s.to_string()); + + let step1 = ATask::new({ + let weight_key = weight_key.clone(); + move |s| { + let weighted_out_degree = s.out_edges().iter().fold(0.0f64, |acc, edge| { + weight_key + .as_ref() + .and_then(|key| edge.properties().get(key)) + .and_then(|p| p.as_f64()) + .unwrap_or(1.0) + + acc + }); + let state: &mut PageRankState = s.get_mut(); + state.weighted_out_degree = weighted_out_degree; + Step::Continue + } }); let step2: ATask = ATask::new(move |s| { - // reset score { let state: &mut PageRankState = s.get_mut(); state.reset(); } - for t in s.in_neighbours() { - let prev = t.prev(); - - s.get_mut().score += prev.score / prev.out_degree as f64; + for edge in s.in_edges() { + let w = weight_key + .as_ref() + .and_then(|key| edge.properties().get(key)) + .and_then(|p| p.as_f64()) + .unwrap_or(1.0); + let nbr = edge.nbr(); + let prev = nbr.prev(); + if prev.weighted_out_degree > 0.0 { + s.get_mut().score += prev.score * w / prev.weighted_out_degree; + } } s.get_mut().score *= damp; - s.get_mut().score += teleport_prob; Step::Continue }); let step3 = ATask::new(move |s| { let state: &mut PageRankState = s.get_mut(); - - if state.out_degree == 0 { + if state.weighted_out_degree == 0.0 { let curr = s.prev().score; - let ts_contrib = factor * curr; s.global_update(&total_sink_contribution, ts_contrib); } diff --git a/raphtory/src/python/packages/algorithms.rs b/raphtory/src/python/packages/algorithms.rs index 3670dc440b..29ba8da700 100644 --- a/raphtory/src/python/packages/algorithms.rs +++ b/raphtory/src/python/packages/algorithms.rs @@ -6,7 +6,7 @@ use crate::{ centrality::{ betweenness::betweenness_centrality as betweenness_rs, degree_centrality::degree_centrality as degree_centrality_rs, hits::hits as hits_rs, - pagerank::unweighted_page_rank, + pagerank::page_rank, }, community_detection::{ label_propagation::label_propagation as label_propagation_rs, @@ -268,20 +268,23 @@ pub fn out_component( /// is less than the max diff value given. /// use_l2_norm (bool): Flag for choosing the norm to use for convergence checks, True for l2 norm, False for l1 norm. Defaults to True. /// damping_factor (float): The damping factor for the PageRank calculation. Defaults to 0.85. +/// weight (Optional[str]): Edge property key to use as weight. If None, all edges have weight 1.0. /// /// Returns: /// NodeStateF64: Mapping of nodes to their pagerank value. #[pyfunction] -#[pyo3(signature = (graph, iter_count=20, max_diff=None, use_l2_norm=true, damping_factor=0.85))] +#[pyo3(signature = (graph, iter_count=20, max_diff=None, use_l2_norm=true, damping_factor=0.85, weight=None))] pub fn pagerank( graph: &PyGraphView, iter_count: usize, max_diff: Option, use_l2_norm: bool, damping_factor: Option, + weight: Option<&str>, ) -> NodeState<'static, f64, DynamicGraph> { - unweighted_page_rank( + page_rank( &graph.graph, + weight, Some(iter_count), None, max_diff, diff --git a/raphtory/tests/algo_tests/centrality.rs b/raphtory/tests/algo_tests/centrality.rs index 2d3f5c71e5..5999321429 100644 --- a/raphtory/tests/algo_tests/centrality.rs +++ b/raphtory/tests/algo_tests/centrality.rs @@ -4,7 +4,7 @@ use itertools::Itertools; use raphtory::{ algorithms::centrality::{ betweenness::betweenness_centrality, degree_centrality::degree_centrality, hits::hits, - pagerank::unweighted_page_rank, + pagerank::page_rank, }, prelude::*, test_storage, @@ -144,7 +144,7 @@ fn test_page_rank() { } test_storage!(&graph, |graph| { - let results = unweighted_page_rank(graph, Some(1000), Some(1), None, true, None); + let results = page_rank(graph, None, Some(1000), Some(1), None, true, None); assert_eq_f64(results.get_by_node("1"), Some(&0.38694), 5); assert_eq_f64(results.get_by_node("2"), Some(&0.20195), 5); @@ -188,7 +188,7 @@ fn motif_page_rank() { } test_storage!(&graph, |graph| { - let results = unweighted_page_rank(graph, Some(1000), Some(4), None, true, None); + let results = page_rank(graph, None, Some(1000), Some(4), None, true, None); assert_eq_f64(results.get_by_node("10"), Some(&0.072082), 5); assert_eq_f64(results.get_by_node("8"), Some(&0.136473), 5); @@ -215,7 +215,7 @@ fn two_nodes_page_rank() { } test_storage!(&graph, |graph| { - let results = unweighted_page_rank(graph, Some(1000), Some(4), None, false, None); + let results = page_rank(graph, None, Some(1000), Some(4), None, false, None); assert_eq_f64(results.get_by_node("1"), Some(&0.5), 3); assert_eq_f64(results.get_by_node("2"), Some(&0.5), 3); @@ -233,7 +233,7 @@ fn three_nodes_page_rank_one_dangling() { } test_storage!(&graph, |graph| { - let results = unweighted_page_rank(graph, Some(10), Some(4), None, false, None); + let results = page_rank(graph, None, Some(10), Some(4), None, false, None); assert_eq_f64(results.get_by_node("1"), Some(&0.303), 3); assert_eq_f64(results.get_by_node("2"), Some(&0.393), 3); @@ -270,7 +270,7 @@ fn dangling_page_rank() { graph.add_edge(t, src, dst, NO_PROPS, None).unwrap(); } test_storage!(&graph, |graph| { - let results = unweighted_page_rank(graph, Some(1000), Some(4), None, true, None); + let results = page_rank(graph, None, Some(1000), Some(4), None, true, None); assert_eq_f64(results.get_by_node("1"), Some(&0.055), 3); assert_eq_f64(results.get_by_node("2"), Some(&0.079), 3); @@ -286,6 +286,124 @@ fn dangling_page_rank() { }); } +#[test] +fn page_rank_non_uniform_weights() { + let graph = Graph::new(); + graph + .add_edge(0, 1, 2, [("weight", Prop::F64(0.37))], None) + .unwrap(); + graph + .add_edge(0, 1, 3, [("weight", Prop::F64(4.2))], None) + .unwrap(); + graph + .add_edge(0, 2, 1, [("weight", Prop::F64(0.9))], None) + .unwrap(); + graph + .add_edge(0, 2, 4, [("weight", Prop::F64(1.7))], None) + .unwrap(); + graph + .add_edge(0, 3, 1, [("weight", Prop::F64(2.6))], None) + .unwrap(); + graph + .add_edge(0, 3, 2, [("weight", Prop::F64(0.05))], None) + .unwrap(); + graph + .add_edge(0, 4, 3, [("weight", Prop::F64(3.3))], None) + .unwrap(); + graph + .add_edge(0, 4, 1, [("weight", Prop::F64(0.8))], None) + .unwrap(); + + test_storage!(&graph, |graph| { + let results = + page_rank(graph, Some("weight"), Some(1000), Some(1), Some(1e-10), true, None); + + assert_eq_f64(results.get_by_node("1"), Some(&0.42499), 5); + assert_eq_f64(results.get_by_node("2"), Some(&0.07353), 5); + assert_eq_f64(results.get_by_node("3"), Some(&0.42311), 5); + assert_eq_f64(results.get_by_node("4"), Some(&0.07837), 5); + }); +} + +#[test] +fn page_rank_dangling_weighted() { + let graph = Graph::new(); + graph + .add_edge(0, 1, 2, [("weight", Prop::F64(0.12))], None) + .unwrap(); + graph + .add_edge(0, 1, 3, [("weight", Prop::F64(7.1))], None) + .unwrap(); + graph + .add_edge(0, 2, 4, [("weight", Prop::F64(0.004))], None) + .unwrap(); + graph + .add_edge(0, 3, 1, [("weight", Prop::F64(1.9))], None) + .unwrap(); + graph + .add_edge(0, 3, 5, [("weight", Prop::F64(0.63))], None) + .unwrap(); + + test_storage!(&graph, |graph| { + let results = + page_rank(graph, Some("weight"), Some(1000), Some(1), Some(1e-10), true, None); + + assert_eq_f64(results.get_by_node("1"), Some(&0.28736), 5); + assert_eq_f64(results.get_by_node("2"), Some(&0.08587), 5); + assert_eq_f64(results.get_by_node("3"), Some(&0.32201), 5); + assert_eq_f64(results.get_by_node("4"), Some(&0.15480), 5); + assert_eq_f64(results.get_by_node("5"), Some(&0.14997), 5); + }); +} + +#[test] +fn page_rank_uniform_weights_match_unweighted() { + let graph = Graph::new(); + let edges = vec![(1, 2), (1, 4), (2, 3), (3, 1), (4, 1)]; + for (src, dst) in edges { + graph + .add_edge(0, src, dst, [("weight", Prop::F64(1.0))], None) + .unwrap(); + } + + test_storage!(&graph, |graph| { + let unweighted = page_rank(graph, None, Some(1000), Some(1), None, true, None); + let weighted = + page_rank(graph, Some("weight"), Some(1000), Some(1), None, true, None); + + for node in ["1", "2", "3", "4"] { + assert_eq_f64( + weighted.get_by_node(node), + unweighted.get_by_node(node), + 5, + ); + } + }); +} + +#[test] +fn page_rank_missing_property_defaults_to_unweighted() { + let graph = Graph::new(); + let edges = vec![(1, 2), (1, 4), (2, 3), (3, 1), (4, 1)]; + for (src, dst) in edges { + graph.add_edge(0, src, dst, NO_PROPS, None).unwrap(); + } + + test_storage!(&graph, |graph| { + let unweighted = page_rank(graph, None, Some(1000), Some(1), None, true, None); + let weighted = + page_rank(graph, Some("weight"), Some(1000), Some(1), None, true, None); + + for node in ["1", "2", "3", "4"] { + assert_eq_f64( + weighted.get_by_node(node), + unweighted.get_by_node(node), + 5, + ); + } + }); +} + pub fn assert_eq_f64 + PartialEq + std::fmt::Debug>( a: Option, b: Option,