Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@ networks/weblinks
LOCAL/
networks/example.py
*old/
sanity_checks.ipynb
*.ipynb
nohup.out
undirect*
Summary/
Summary/BackboneStats.tex

# New stuff for now
new_networks/
Figures/

# Run scripts
script*
Expand Down
5 changes: 3 additions & 2 deletions 00-pre-process.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,15 +80,16 @@ def read_gpickle(file_name):
if weight_type in 'proximity':

print('Prox -> Dist')
P_dict = nx.get_edge_attributes(G, weight_attr)
P_dict = dict(nx.get_edge_attributes(G, weight_attr))
D_dict = {key: prox2dist(value) for key, value in P_dict.items()}
nx.set_edge_attributes(G, name='distance', values=D_dict)

if weight_type == 'distance':

D_dict = nx.get_edge_attributes(G, name=weight_attr)
D_dict = dict(nx.get_edge_attributes(G, name=weight_attr))
P_dict = {key: dist2prox(value) for key, value in D_dict.items()}

print(P_dict.values())

if (min(P_dict.values()) < 0) or (max(P_dict.values()) > 1.0):
raise TypeError("Proximity values not in [0,1]")
Expand Down
10 changes: 5 additions & 5 deletions 01-calc-backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
# Files
wGgraphml = 'networks/{folder:s}/backbone.graphml'.format(folder=folder)
wFdistortion = 'networks/{folder:s}/distortion.pickle'.format(folder=folder)
wFasymmetry = 'networks/{folder:s}/asymmetry.pickle'.format(folder=folder)
#wFasymmetry = 'networks/{folder:s}/asymmetry.pickle'.format(folder=folder)

# Load Network
rGfile = 'networks/{folder:s}/network.graphml'.format(folder=folder)
Expand All @@ -65,7 +65,7 @@
#
# Asymmetry distribution
#
alpha = get_asymmetry_distribution(G)
#alpha = get_asymmetry_distribution(G)

# Dictionary of distortion distribution
distortion_dist = dict()
Expand All @@ -85,13 +85,13 @@
print('--- Exporting Formats ---')
ensurePathExists(wGgraphml)
ensurePathExists(wFdistortion)
ensurePathExists(wFasymmetry)
#ensurePathExists(wFasymmetry)

print('> Backbone')
nx.write_graphml(G, wGgraphml)
print('> Distortion')
pk.dump(distortion_dist, open(wFdistortion, 'wb'))
print('> Asymmetry')
pk.dump(alpha, open(wFasymmetry, 'wb'))
#print('> Asymmetry')
#pk.dump(alpha, open(wFasymmetry, 'wb'))

print('\n\n')
63 changes: 38 additions & 25 deletions 02-network-stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,54 +43,67 @@
rGfile = 'networks/{folder:s}/network.graphml'.format(folder=folder)
rBfile = 'networks/{folder:s}/backbone.graphml'.format(folder=folder)
wGstats = 'networks/{folder:s}/network-stats.csv'.format(folder=folder)
wFasymmetry = 'networks/{folder:s}/backbone_asymmetry.pickle'.format(folder=folder)
#wFasymmetry = 'networks/{folder:s}/backbone_asymmetry.pickle'.format(folder=folder)

# Load graph
G = nx.read_graphml(rGfile)

# Calculate stats
n_nodes = G.number_of_nodes()
n_edges = G.number_of_edges()
wcc_nodes = G.number_of_nodes()
wcc_edges = G.number_of_edges()

density = nx.density(G)

LSCC = G.subgraph(max(nx.strongly_connected_components(G), key=len))

lscc_nodes = LSCC.number_of_nodes()
lscc_edges = LSCC.number_of_edges()

# Load backbone
# Metric
G = nx.read_graphml(rBfile)
n_edges_metric = G.number_of_edges()

# New asymmetry dist
alpha = dict()
alpha['metric'] = get_asymmetry_distribution(G)
# Ultrametric
edges2remove = [(i, j) for i, j, d in G.edges(data=True) if 'ultrametric' not in d]
G.remove_edges_from(edges2remove)
n_edges_ultrametric = G.number_of_edges()
alpha['ultrametric'] = get_asymmetry_distribution(G)
# Metric
tau_wcc_metric = G.number_of_edges()/wcc_edges
# Ultrametric AND LSCC
tau_wcc_ultrametric = 0 #sum([int(d) for _, _, d in G.edges(data='ultrametric')])/wcc_edges
tau_lscc_metric = 0
tau_lscc_ultrametric = 0
for u, v, ultra in G.edges(data='ultrametric'):
if LSCC.has_edge(u, v):
tau_lscc_metric += 1
if ultra:
tau_wcc_ultrametric += 1
if LSCC.has_edge(u, v):
tau_lscc_ultrametric += 1

tau_wcc_ultrametric /= wcc_edges
if lscc_edges > 0.0:
tau_lscc_ultrametric /= lscc_edges
tau_lscc_metric /= lscc_edges

# to Result Series
sR = pd.Series({
'n-nodes': n_nodes,
'n-edges': n_edges,
'n-nodes': wcc_nodes,
'n-edges': wcc_edges,
#
'density': density,
#
'n-edges-metric': n_edges_metric,
'n-edges-ultrametric': n_edges_ultrametric,
'LSCC-nodes': lscc_nodes,
'LSCC-edges': lscc_edges,
#
'%-edges-metric': (n_edges_metric / n_edges),
'%-edges-ultrametric': (n_edges_ultrametric / n_edges),
'tau-metric': tau_wcc_metric,
'tau-ultrametric': tau_wcc_ultrametric,
#
'%-redundancy-metric': 1 - (n_edges_metric / n_edges),
'%-redundancy-ultrametric': 1 - (n_edges_ultrametric / n_edges),
'LSCC-tau-metric': tau_lscc_metric,
'LSCC-tau-ultrametric': tau_lscc_ultrametric,
#
'%-edges-ultrametric/metric': ((n_edges_ultrametric / n_edges) / (n_edges_metric / n_edges)),
'ultrametric_metric_ratio': (tau_wcc_ultrametric/tau_wcc_metric),
'LSCC-ultrametric_metric_ratio': (tau_lscc_ultrametric/tau_lscc_metric if tau_lscc_metric > 0 else 0),
#
}, name=network, dtype='object')

# Print
print(sR)
sR.to_csv(wGstats)
print('> Asymmetry')
pk.dump(alpha, open(wFasymmetry, 'wb'))
#print('> Asymmetry')
#pk.dump(alpha, open(wFasymmetry, 'wb'))
print("\n\n")
31 changes: 15 additions & 16 deletions 20-undirected-version.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,32 +47,31 @@
print("Loading network: {network:s}".format(network=network))
G = nx.read_graphml(rGraphml)

# Select largest connected component
G = G.subgraph(max(nx.strongly_connected_components(G), key=len))
nx.set_edge_attributes(G, values=None, name='alpha')

U = {'min': nx.Graph(), 'max': nx.Graph(), 'avg': nx.Graph()}
for g in U.values():
g.add_nodes_from(G.nodes())
U = nx.Graph()
U.add_nodes_from(G.nodes())

for u, v, w in G.edges(data=True):
if w['alpha'] == None:
G[u][v]['alpha'] = 0.0
pij = w['proximity']
if G.has_edge(v, u):
G[v][u]['alpha'] = 0.0
pji = G[v][u]['proximity']
pmin = min(pij, pji)
U['min'].add_edge(u, v, distance=prox2dist(pmin))
else:
pji = 0

pmax = max(pij, pji)
pavg = 0.5*(pij+pji)

U['max'].add_edge(u, v, distance=prox2dist(pmax))
U['avg'].add_edge(u, v, distance=prox2dist(pavg))

din = G[u][v]['distance']
dout = G[v][u]['distance']

U.add_edge(u, v, avg_distance=0.5*(din + dout), max_distance=max(din, dout))

components = []
for c in nx.connected_components(U):
if len(c) > 2:
components.append((G.subgraph(c).copy(), U.subgraph(c).copy()))

#nx.write_graphml(U, wGraphml)
pk.dump(U, open(wGraphml, 'wb'))
pk.dump(components, open(wGraphml, 'wb'))
print("Done")


48 changes: 26 additions & 22 deletions 21-calc-backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,37 +48,41 @@
weight_attr = settings.get('weight-attr')

# Files
wGgraphml = 'networks/{folder:s}/undirected_backbones.pickle'.format(folder=folder)
rGfile = 'networks/{folder:s}/undirected_networks.pickle'.format(folder=folder)
wFdistortion = 'networks/{folder:s}/undirected_distortions.pickle'.format(folder=folder)
wGstats = 'networks/{folder:s}/undirected_networks-stats.csv'.format(folder=folder)

# Load Network
print("Loading network: {network:s}".format(network=network))
rGfile = 'networks/{folder:s}/undirected_networks.pickle'.format(folder=folder)
G = pk.load(open(rGfile, 'rb'))
components = pk.load(open(rGfile, 'rb'))


# Dictionary of distortion distribution
distortion_dist = {'min': dict(), 'max': dict(), 'avg': dict()}
df = pd.DataFrame(columns=['n-nodes', 'nd-edges', 'nu-edges', 'tau-metric', 'tau-ultrametric',
'tau-avg-metric', 'tau-avg-ultrametric', 'tau-max-metric', 'tau-max-ultrametric'], index=range(len(components)))

for type in ['min', 'max', 'avg']:
print(type)
#
# Metric computation
#
G[type], s_values = dc.backbone(G[type], weight='distance', kind='metric', distortion=True)
distortion_dist[type]['metric'] = s_values
#
# Ultrametric computation
#
U, s_values = dc.backbone(G[type], weight='distance', kind='ultrametric', distortion=True)
distortion_dist[type]['ultrametric'] = s_values
nx.set_edge_attributes(G[type], name='ultrametric', values={(u, v): U.has_edge(u, v) for u, v in G[type].edges()})
single_s = {'metric': dict(), 'ultrametric': dict(), 'avg-metric': dict(),
'avg-ultrametric': dict(), 'max-metric': dict(), 'max-ultrametric': dict()}

s_values = [single_s.copy() for _ in range(len(components))]

for idx, (D, U) in enumerate(components):
df['n-nodes'][idx] = D.number_of_nodes()
df['nd-edges'][idx] = D.number_of_edges()
df['nu-edges'][idx] = U.number_of_edges()

for kind in ['metric', 'ultrametric']:
B, s_values[idx][kind] = dc.backbone(D, weight='distance', kind=kind, distortion=True)
df[f'tau-{kind}'][idx] = B.number_of_edges()/df['nd-edges'][idx]
for utype in ['avg', 'max']:
B, s_values[idx][f'{utype}-{kind}'] = dc.backbone(U, weight=f'{utype}_distance', kind=kind, distortion=True)
df[f'tau-{utype}-{kind}'][idx] = B.number_of_edges()/df['nu-edges'][idx]


print('--- Exporting Formats ---')
ensurePathExists(wGgraphml)
ensurePathExists(wFdistortion)

print('> Backbone')
pk.dump(G, open(wGgraphml, 'wb'))
print('> Backbone Statistics')
df.to_csv(wGstats)
print('> Distortion')
pk.dump(distortion_dist, open(wFdistortion, 'wb'))
pk.dump(s_values, open(wFdistortion, 'wb'))
print('\n')
3 changes: 1 addition & 2 deletions 22-backbone-comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@

for idx, network in enumerate(networks):
print(network)
settings = config[network]
folder = settings.get('folder')
folder = config[network].get('folder')

dfM.loc[idx, 'name'] = network
dfU.loc[idx, 'name'] = network
Expand Down
93 changes: 93 additions & 0 deletions 22-network-stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# coding=utf-8
# Author: Rion B Correia & Felipe Xavier Costa
# Date: Feb 22, 2023
#
# Description: Reads a network and computes backbone size statistics.
#
#
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
#pd.options.display.float_format = '{:.2%}'.format
import networkx as nx
import argparse
import configparser
from utils import get_asymmetry_distribution
import pickle as pk


if __name__ == '__main__':

#
# Init
#
config = configparser.ConfigParser()
config.read('networks.ini')
networks = list(config.keys())[1:]

#
# Args
#
parser = argparse.ArgumentParser()
parser.add_argument("--network", default='bike-sharing', type=str, choices=networks, help="Network name.")
args = parser.parse_args()
#
network = args.network
#
settings = config[network]
folder = settings.get('folder')

# Files
rGfile = 'networks/{folder:s}/undirected_networks.pickle'.format(folder=folder)
rBfile = 'networks/{folder:s}/undirected_backbones.pickle'.format(folder=folder)
wGstats = 'networks/{folder:s}/undirected_networks-stats.csv'.format(folder=folder)
#wFasymmetry = 'networks/{folder:s}/backbone_asymmetry.pickle'.format(folder=folder)

# Load graph
G = pk.load(open(rGfile, 'rb'))
B = pk.load(open(rBfile, 'rb'))

df = pd.DataFrame(columns=['n-nodes', 'n-edges', 'density', 'tau-edges-metric','tau-edges-ultrametric'],
index=['min', 'max', 'avg', 'harm'])

# Min and Harm can be done in entrie network
for type in ['min', 'harm']:
print(type)
df['n-nodes'][type] = G[type].number_of_nodes()
df['n-edges'][type] = G[type].number_of_edges()
df['density'][type] = nx.density(G[type])

if df['n-edges'][type] > 0:
df['tau-edges-metric'][type] = B[type].number_of_edges()/df['n-edges'][type]
df['tau-edges-ultrametric'][type] = sum([int(d) for _, _, d in B[type].edges(data='ultrametric')])/df['n-edges'][type]
else:
df['tau-edges-metric'][type] = 0.0
df['tau-edges-ultrametric'][type] = 0.0

# Max and Avg Should be in a subset of the LSCC
rDfile = 'networks/{folder:s}/network.graphml'.format(folder=folder)
D = nx.read_graphml(rDfile)
lscc_nodes = max(nx.strongly_connected_components(D), key=len)
for type in ['max', 'avg']:
print(type)
g = G[type].subgraph(lscc_nodes)

df['n-nodes'][type] = g.number_of_nodes()
df['n-edges'][type] = g.number_of_edges()
df['density'][type] = nx.density(g)

if df['n-edges'][type] > 0:
b = B[type].subgraph(lscc_nodes)
df['tau-edges-metric'][type] = b.number_of_edges()/df['n-edges'][type]
df['tau-edges-ultrametric'][type] = sum([int(d) for _, _, d in b.edges(data='ultrametric')])/df['n-edges'][type]
else:
df['tau-edges-metric'][type] = 0.0
df['tau-edges-ultrametric'][type] = 0.0


# Print
print(df)
df.to_csv(wGstats)
print("\n\n")
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

Source codes for the Directed Distance Backbone project.

The considered networks have edge attributes 'distance' by construction.

### Scripts functionalities:
| Starts with | Functionality |
|-------------|-------------------------|
| 0 | Specific model measures |
| 1 | All models analysis |
| 2 | Undirected analysis |
| 2 | Undirected analysis |
Loading