Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion python/python/raphtory/algorithms/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -703,15 +703,17 @@ def louvain(
resolution: float = 1.0,
weight_prop: str | None = None,
tol: None | float = None,
modularity: Literal("configuration", "constant") = "configuration",
) -> NodeStateUsize:
"""
Louvain algorithm for community detection
Louvain algorithm for community detection with configuration model

Arguments:
graph (GraphView): the graph view
resolution (float): the resolution parameter for modularity. Defaults to 1.0.
weight_prop (str | None): the edge property to use for weights (has to be float)
tol (None | float): the floating point tolerance for deciding if improvements are significant (default: 1e-8)
modularity (Literal("configuration", "constant")): the modularity function to use. Default to "configuration".

Returns:
NodeStateUsize: Mapping of nodes to their community assignment
Expand Down
22 changes: 7 additions & 15 deletions python/python/raphtory/vectors/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,19 @@ import networkx as nx # type: ignore
import pyvis # type: ignore
from raphtory.iterables import *

__all__ = ['VectorisedGraph', 'Document', 'Embedding', 'VectorSelection']
class VectorisedGraph(object):
"""VectorisedGraph object that contains embedded documents that correspond to graph entities."""
__all__ = ["VectorisedGraph", "Document", "Embedding", "VectorSelection"]

class VectorisedGraph(object):
"""VectorisedGraph object that contains embedded documents that correspond to graph entities."""

def edges_by_similarity(
self,
query: str | list,
limit: int,
window: Optional[Tuple[int | str, int | str]] = None,
) -> VectorSelection:
"""
Search the top similarity scoring edges according to matching a specified `query` with no more than `limit` edges in the result.
Perform a similarity search between each edge's associated document and a specified `query`. Returns a number of edges up to a specified `limit` ranked in descending order of similarity score.

Args:
query (str | list): The text or the embedding to score against.
Expand All @@ -56,7 +56,7 @@ class VectorisedGraph(object):
window: Optional[Tuple[int | str, int | str]] = None,
) -> VectorSelection:
"""
Search the top similarity scoring entities according to matching a specified `query` with no more than `limit` entities in the result.
Perform a similarity search between each entity's associated document and a specified `query`. Returns a number of entities up to a specified `limit` ranked in descending order of similarity score.

Args:
query (str | list): The text or the embedding to score against.
Expand All @@ -74,7 +74,7 @@ class VectorisedGraph(object):
window: Optional[Tuple[int | str, int | str]] = None,
) -> VectorSelection:
"""
Search the top similarity scoring nodes according to matching a specified `query` with no more than `limit` nodes in the result.
Perform a similarity search between each node's associated document and a specified `query`. Returns a number of nodes up to a specified `limit` ranked in descending order of similarity score.

Args:
query (str | list): The text or the embedding to score against.
Expand All @@ -86,15 +86,7 @@ class VectorisedGraph(object):
"""

class Document(object):
"""
A document corresponding to a graph entity. Used to generate embeddings.

Args:
content (str): The document content.
life (int | Tuple[int, int], optional): The optional lifespan of the document. A single value
corresponds to an event, a tuple corresponds to a
window.
"""
"""A document corresponding to a graph entity. Used to generate embeddings."""

def __repr__(self):
"""Return repr(self)."""
Expand Down
298 changes: 298 additions & 0 deletions raphtory/src/algorithms/community_detection/modularity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,22 @@ impl<C: Into<ComID>> FromIterator<C> for Partition {
}

impl Partition {
pub fn from_coms(coms: Vec<Vec<VID>>) -> Self {
let num_nodes: usize = coms.iter().map(|com| com.len()).sum();
let mut node_to_com = vec![ComID(0); num_nodes];
let mut com_to_nodes = Vec::with_capacity(coms.len());
for (i, com) in coms.into_iter().enumerate() {
let com: HashSet<VID> = com.into_iter().map(|v| v.into()).collect();
for v in com.iter() {
node_to_com[v.index()] = ComID(i);
}
com_to_nodes.push(com);
}
Self {
node_to_com,
com_to_nodes,
}
}
/// Initialise all-singleton partition (i.e., each node in its own community)
pub fn new_singletons(n: usize) -> Self {
let node_to_com = (0..n).map(ComID).collect();
Expand Down Expand Up @@ -125,6 +141,37 @@ impl Partition {
old_to_new,
)
}

pub fn entropy(&self) -> f64 {
let mut value = 0.0;
let total_count = self.num_nodes() as f64;
for (_, com) in self.coms() {
let count = com.len();
if count > 0 {
let p = count as f64 / total_count;
value += p * p.log2();
}
}
-value
}

/// Compute normalised mutual information between this partition and other partition in bits
pub fn nmi(&self, other: &Partition) -> f64 {
let total_count = self.num_nodes() as f64;
let mut value = 0.0;
for (_, com_i) in self.coms() {
for (j, com_j) in other.coms() {
let p_ij =
(com_i.iter().filter(|&v| other.com(v) == j).count() as f64) / total_count;
if p_ij > 0.0 {
let p_i = (com_i.len() as f64) / total_count;
let p_j = (com_j.len() as f64) / total_count;
value += p_ij * (p_ij / (p_i * p_j)).log2();
}
}
}
2.0 * value / (self.entropy() + other.entropy())
}
}

pub trait ModularityFunction {
Expand Down Expand Up @@ -409,3 +456,254 @@ impl ModularityFunction for ModularityUnDir {
Box::new((0..self.partition.num_nodes()).map(VID))
}
}

/// Constant Potts model modularity from https://arxiv.org/pdf/1104.3083
pub struct ConstModularity {
resolution: f64,
partition: Partition,
adj: Vec<Vec<(VID, f64)>>,
self_loops: Vec<f64>,
adj_com: Vec<HashMap<ComID, f64>>,
n: Vec<i64>,
n_com: Vec<i64>,
n_tot: i64,
m2: f64,
tol: f64,
}

impl ModularityFunction for ConstModularity {
fn new<'graph, G: GraphViewOps<'graph>>(
graph: G,
weight_prop: Option<&str>,
resolution: f64,
partition: Partition,
tol: f64,
) -> Self {
let num_nodes = graph.count_nodes();
let n = vec![1; num_nodes];
let nodes = graph.nodes();
let local_id_map: HashMap<_, _> =
nodes.iter().enumerate().map(|(i, n)| (n, VID(i))).collect();
let adj: Vec<_> = nodes
.iter()
.map(|node| {
node.edges()
.iter()
.filter(|e| e.dst() != e.src())
.map(|e| {
let w = weight_prop
.map(|w| e.properties().get(w).unwrap_f64())
.unwrap_or(1.0);
let dst_id = local_id_map[&e.nbr().cloned()];
(dst_id, w)
})
.filter(|(_, w)| w >= &tol)
.collect::<Vec<_>>()
})
.collect();
let self_loops: Vec<_> = graph
.nodes()
.iter()
.map(|node| {
graph
.edge(node.node, node.node)
.map(|e| {
weight_prop
.map(|w| e.properties().get(w).unwrap_f64())
.unwrap_or(1.0)
})
.filter(|w| w >= &tol)
.unwrap_or(0.0)
})
.collect();
let m2: f64 = adj
.iter()
.flat_map(|neighbours| neighbours.iter().map(|(_, w)| w))
.sum();
let adj_com: Vec<_> = adj
.iter()
.enumerate()
.map(|(index, neighbours)| {
let mut com_neighbours = HashMap::new();
for (n, w) in neighbours {
com_neighbours
.entry(partition.com(n))
.and_modify(|old_w| *old_w += *w)
.or_insert(*w);
}
if self_loops[index] != 0.0 {
*com_neighbours
.entry(partition.com(&VID(index)))
.or_insert(0.0) += self_loops[index];
}
com_neighbours
})
.collect();

let n_com = partition.coms().map(|(_, com)| com.len() as i64).collect();
Self {
partition,
adj,
self_loops,
adj_com,
resolution,
n,
n_com,
n_tot: num_nodes as i64,
m2,
tol,
}
}

fn move_delta(&self, node: &VID, new_com: ComID) -> f64 {
let old_com = self.partition.com(node);
if old_com == new_com {
0.0
} else {
let a = 2.0
* (self.adj_com[node.index()].get(&new_com).unwrap_or(&0.0)
- self.adj_com[node.index()].get(&old_com).unwrap_or(&0.0)
+ self.self_loops[node.index()]);
let p = 2
* (self.n[node.index()]
* (self.n_com[new_com.index()] - self.n_com[old_com.index()])
+ self.n[node.index()].pow(2));

(a / self.m2 - self.resolution * p as f64 / self.n_tot.pow(2) as f64)
}
}

fn move_node(&mut self, node: &VID, new_com: ComID) {
let old_com = self.partition.com(node);
if old_com != new_com {
let w_self = self.self_loops[node.index()];
match self.adj_com[node.index()]
.entry(old_com)
.and_modify(|v| *v -= w_self)
{
Entry::Occupied(v) => {
if *v.get() < self.tol {
v.remove();
}
}
_ => {
// should only be possible for small values due to tolerance above
debug_assert!(w_self < self.tol)
}
}
if w_self != 0.0 {
*self.adj_com[node.index()].entry(new_com).or_insert(0.0) += w_self;
}

for (n, w) in &self.adj[node.index()] {
match self.adj_com[n.index()]
.entry(old_com)
.and_modify(|v| *v -= w)
{
Entry::Occupied(v) => {
if *v.get() < self.tol {
v.remove();
}
}
_ => {
// should only be possible for small values due to tolerance above
debug_assert!(*w < self.tol)
}
}
match self.adj_com[node.index()]
.entry(self.partition.com(n))
.and_modify(|v| *v -= w)
{
Entry::Occupied(v) => {
if *v.get() < self.tol {
v.remove();
}
}
_ => {
// should only be possible for small values due to tolerance above
debug_assert!(*w < self.tol)
}
}
*self.adj_com[n.index()].entry(new_com).or_insert(0.0) += w;
*self.adj_com[node.index()]
.entry(self.partition.com(n))
.or_insert(0.0) += w;
}
self.n_com[old_com.index()] -= self.n[node.index()];
self.n_com[new_com.index()] += self.n[node.index()];
}
self.partition.move_node(node, new_com);
}

fn candidate_moves(&self, node: &VID) -> Box<dyn Iterator<Item = ComID> + '_> {
Box::new(self.adj_com[node.index()].keys().copied())
}

fn aggregate(&mut self) -> Partition {
let old_partition = mem::take(&mut self.partition);
let (new_partition, new_to_old, old_to_new) = old_partition.compact();
let adj_com: Vec<_> = new_partition
.coms()
.map(|(_c_new, com)| {
let mut neighbours = HashMap::new();
for n in com {
for (c_old, w) in &self.adj_com[n.index()] {
*neighbours.entry(old_to_new[c_old]).or_insert(0.0) += w;
}
}
neighbours
})
.collect();
let adj: Vec<_> = adj_com
.iter()
.enumerate()
.map(|(index, neighbours)| {
neighbours
.iter()
.filter(|(ComID(c), _)| c != &index)
.map(|(ComID(index), w)| (VID(*index), *w))
.collect::<Vec<_>>()
})
.collect();
let self_loops: Vec<_> = adj_com
.iter()
.enumerate()
.map(|(index, neighbours)| neighbours.get(&ComID(index)).copied().unwrap_or(0.0))
.collect();
let n: Vec<_> = new_to_old
.into_iter()
.map(|ComID(index)| self.n_com[index])
.collect();
let n_com = n.clone();
let partition = Partition::new_singletons(new_partition.num_coms());
self.adj = adj;
self.adj_com = adj_com;
self.self_loops = self_loops;
self.n = n;
self.n_com = n_com;
self.partition = partition;
new_partition
}

fn value(&self) -> f64 {
let e: f64 = self
.partition
.coms()
.map(|(cid, com)| {
com.iter()
.flat_map(|n| self.adj_com[n.index()].get(&cid))
.sum::<f64>()
})
.sum();
let k: i64 = self.n_com.iter().map(|n| n.pow(2)).sum();
e / self.m2 - k as f64 / self.n_tot.pow(2) as f64
}

fn partition(&self) -> &Partition {
&self.partition
}

fn nodes(&self) -> Box<dyn Iterator<Item = VID>> {
Box::new((0..self.partition.num_nodes()).map(VID))
}
}
Loading
Loading