diff --git a/.gitignore b/.gitignore index 4f40673..4a80fb5 100644 --- a/.gitignore +++ b/.gitignore @@ -12,13 +12,13 @@ networks/weblinks LOCAL/ networks/example.py *old/ -sanity_checks.ipynb +*.ipynb nohup.out -undirect* -Summary/ +Summary/BackboneStats.tex # New stuff for now new_networks/ +Figures/ # Run scripts script* diff --git a/00-pre-process.py b/00-pre-process.py index fc6077a..bbe63cb 100644 --- a/00-pre-process.py +++ b/00-pre-process.py @@ -80,15 +80,16 @@ def read_gpickle(file_name): if weight_type in 'proximity': print('Prox -> Dist') - P_dict = nx.get_edge_attributes(G, weight_attr) + P_dict = dict(nx.get_edge_attributes(G, weight_attr)) D_dict = {key: prox2dist(value) for key, value in P_dict.items()} nx.set_edge_attributes(G, name='distance', values=D_dict) if weight_type == 'distance': - D_dict = nx.get_edge_attributes(G, name=weight_attr) + D_dict = dict(nx.get_edge_attributes(G, name=weight_attr)) P_dict = {key: dist2prox(value) for key, value in D_dict.items()} + print(P_dict.values()) if (min(P_dict.values()) < 0) or (max(P_dict.values()) > 1.0): raise TypeError("Proximity values not in [0,1]") diff --git a/01-calc-backbone.py b/01-calc-backbone.py index ee0e721..0468bd4 100644 --- a/01-calc-backbone.py +++ b/01-calc-backbone.py @@ -53,7 +53,7 @@ # Files wGgraphml = 'networks/{folder:s}/backbone.graphml'.format(folder=folder) wFdistortion = 'networks/{folder:s}/distortion.pickle'.format(folder=folder) - wFasymmetry = 'networks/{folder:s}/asymmetry.pickle'.format(folder=folder) + #wFasymmetry = 'networks/{folder:s}/asymmetry.pickle'.format(folder=folder) # Load Network rGfile = 'networks/{folder:s}/network.graphml'.format(folder=folder) @@ -65,7 +65,7 @@ # # Asymmetry distribution # - alpha = get_asymmetry_distribution(G) + #alpha = get_asymmetry_distribution(G) # Dictionary of distortion distribution distortion_dist = dict() @@ -85,13 +85,13 @@ print('--- Exporting Formats ---') ensurePathExists(wGgraphml) ensurePathExists(wFdistortion) - ensurePathExists(wFasymmetry) + #ensurePathExists(wFasymmetry) print('> Backbone') nx.write_graphml(G, wGgraphml) print('> Distortion') pk.dump(distortion_dist, open(wFdistortion, 'wb')) - print('> Asymmetry') - pk.dump(alpha, open(wFasymmetry, 'wb')) + #print('> Asymmetry') + #pk.dump(alpha, open(wFasymmetry, 'wb')) print('\n\n') diff --git a/02-network-stats.py b/02-network-stats.py index 8d7c4ca..3ff554a 100644 --- a/02-network-stats.py +++ b/02-network-stats.py @@ -43,54 +43,67 @@ rGfile = 'networks/{folder:s}/network.graphml'.format(folder=folder) rBfile = 'networks/{folder:s}/backbone.graphml'.format(folder=folder) wGstats = 'networks/{folder:s}/network-stats.csv'.format(folder=folder) - wFasymmetry = 'networks/{folder:s}/backbone_asymmetry.pickle'.format(folder=folder) + #wFasymmetry = 'networks/{folder:s}/backbone_asymmetry.pickle'.format(folder=folder) # Load graph G = nx.read_graphml(rGfile) # Calculate stats - n_nodes = G.number_of_nodes() - n_edges = G.number_of_edges() + wcc_nodes = G.number_of_nodes() + wcc_edges = G.number_of_edges() density = nx.density(G) + + LSCC = G.subgraph(max(nx.strongly_connected_components(G), key=len)) + + lscc_nodes = LSCC.number_of_nodes() + lscc_edges = LSCC.number_of_edges() # Load backbone - # Metric G = nx.read_graphml(rBfile) - n_edges_metric = G.number_of_edges() - - # New asymmetry dist - alpha = dict() - alpha['metric'] = get_asymmetry_distribution(G) - # Ultrametric - edges2remove = [(i, j) for i, j, d in G.edges(data=True) if 'ultrametric' not in d] - G.remove_edges_from(edges2remove) - n_edges_ultrametric = G.number_of_edges() - alpha['ultrametric'] = get_asymmetry_distribution(G) + # Metric + tau_wcc_metric = G.number_of_edges()/wcc_edges + # Ultrametric AND LSCC + tau_wcc_ultrametric = 0 #sum([int(d) for _, _, d in G.edges(data='ultrametric')])/wcc_edges + tau_lscc_metric = 0 + tau_lscc_ultrametric = 0 + for u, v, ultra in G.edges(data='ultrametric'): + if LSCC.has_edge(u, v): + tau_lscc_metric += 1 + if ultra: + tau_wcc_ultrametric += 1 + if LSCC.has_edge(u, v): + tau_lscc_ultrametric += 1 + + tau_wcc_ultrametric /= wcc_edges + if lscc_edges > 0.0: + tau_lscc_ultrametric /= lscc_edges + tau_lscc_metric /= lscc_edges # to Result Series sR = pd.Series({ - 'n-nodes': n_nodes, - 'n-edges': n_edges, + 'n-nodes': wcc_nodes, + 'n-edges': wcc_edges, # 'density': density, # - 'n-edges-metric': n_edges_metric, - 'n-edges-ultrametric': n_edges_ultrametric, + 'LSCC-nodes': lscc_nodes, + 'LSCC-edges': lscc_edges, # - '%-edges-metric': (n_edges_metric / n_edges), - '%-edges-ultrametric': (n_edges_ultrametric / n_edges), + 'tau-metric': tau_wcc_metric, + 'tau-ultrametric': tau_wcc_ultrametric, # - '%-redundancy-metric': 1 - (n_edges_metric / n_edges), - '%-redundancy-ultrametric': 1 - (n_edges_ultrametric / n_edges), + 'LSCC-tau-metric': tau_lscc_metric, + 'LSCC-tau-ultrametric': tau_lscc_ultrametric, # - '%-edges-ultrametric/metric': ((n_edges_ultrametric / n_edges) / (n_edges_metric / n_edges)), + 'ultrametric_metric_ratio': (tau_wcc_ultrametric/tau_wcc_metric), + 'LSCC-ultrametric_metric_ratio': (tau_lscc_ultrametric/tau_lscc_metric if tau_lscc_metric > 0 else 0), # }, name=network, dtype='object') # Print print(sR) sR.to_csv(wGstats) - print('> Asymmetry') - pk.dump(alpha, open(wFasymmetry, 'wb')) + #print('> Asymmetry') + #pk.dump(alpha, open(wFasymmetry, 'wb')) print("\n\n") diff --git a/20-undirected-version.py b/20-undirected-version.py index 8794bc8..b996473 100644 --- a/20-undirected-version.py +++ b/20-undirected-version.py @@ -47,32 +47,31 @@ print("Loading network: {network:s}".format(network=network)) G = nx.read_graphml(rGraphml) + # Select largest connected component + G = G.subgraph(max(nx.strongly_connected_components(G), key=len)) nx.set_edge_attributes(G, values=None, name='alpha') - U = {'min': nx.Graph(), 'max': nx.Graph(), 'avg': nx.Graph()} - for g in U.values(): - g.add_nodes_from(G.nodes()) + U = nx.Graph() + U.add_nodes_from(G.nodes()) for u, v, w in G.edges(data=True): if w['alpha'] == None: G[u][v]['alpha'] = 0.0 - pij = w['proximity'] if G.has_edge(v, u): G[v][u]['alpha'] = 0.0 - pji = G[v][u]['proximity'] - pmin = min(pij, pji) - U['min'].add_edge(u, v, distance=prox2dist(pmin)) - else: - pji = 0 - - pmax = max(pij, pji) - pavg = 0.5*(pij+pji) - - U['max'].add_edge(u, v, distance=prox2dist(pmax)) - U['avg'].add_edge(u, v, distance=prox2dist(pavg)) + + din = G[u][v]['distance'] + dout = G[v][u]['distance'] + + U.add_edge(u, v, avg_distance=0.5*(din + dout), max_distance=max(din, dout)) + + components = [] + for c in nx.connected_components(U): + if len(c) > 2: + components.append((G.subgraph(c).copy(), U.subgraph(c).copy())) #nx.write_graphml(U, wGraphml) - pk.dump(U, open(wGraphml, 'wb')) + pk.dump(components, open(wGraphml, 'wb')) print("Done") \ No newline at end of file diff --git a/21-calc-backbone.py b/21-calc-backbone.py index d7561cc..9577aa4 100644 --- a/21-calc-backbone.py +++ b/21-calc-backbone.py @@ -48,37 +48,41 @@ weight_attr = settings.get('weight-attr') # Files - wGgraphml = 'networks/{folder:s}/undirected_backbones.pickle'.format(folder=folder) + rGfile = 'networks/{folder:s}/undirected_networks.pickle'.format(folder=folder) wFdistortion = 'networks/{folder:s}/undirected_distortions.pickle'.format(folder=folder) + wGstats = 'networks/{folder:s}/undirected_networks-stats.csv'.format(folder=folder) # Load Network print("Loading network: {network:s}".format(network=network)) - rGfile = 'networks/{folder:s}/undirected_networks.pickle'.format(folder=folder) - G = pk.load(open(rGfile, 'rb')) + components = pk.load(open(rGfile, 'rb')) + - # Dictionary of distortion distribution - distortion_dist = {'min': dict(), 'max': dict(), 'avg': dict()} + df = pd.DataFrame(columns=['n-nodes', 'nd-edges', 'nu-edges', 'tau-metric', 'tau-ultrametric', + 'tau-avg-metric', 'tau-avg-ultrametric', 'tau-max-metric', 'tau-max-ultrametric'], index=range(len(components))) - for type in ['min', 'max', 'avg']: - print(type) - # - # Metric computation - # - G[type], s_values = dc.backbone(G[type], weight='distance', kind='metric', distortion=True) - distortion_dist[type]['metric'] = s_values - # - # Ultrametric computation - # - U, s_values = dc.backbone(G[type], weight='distance', kind='ultrametric', distortion=True) - distortion_dist[type]['ultrametric'] = s_values - nx.set_edge_attributes(G[type], name='ultrametric', values={(u, v): U.has_edge(u, v) for u, v in G[type].edges()}) + single_s = {'metric': dict(), 'ultrametric': dict(), 'avg-metric': dict(), + 'avg-ultrametric': dict(), 'max-metric': dict(), 'max-ultrametric': dict()} + + s_values = [single_s.copy() for _ in range(len(components))] + + for idx, (D, U) in enumerate(components): + df['n-nodes'][idx] = D.number_of_nodes() + df['nd-edges'][idx] = D.number_of_edges() + df['nu-edges'][idx] = U.number_of_edges() + + for kind in ['metric', 'ultrametric']: + B, s_values[idx][kind] = dc.backbone(D, weight='distance', kind=kind, distortion=True) + df[f'tau-{kind}'][idx] = B.number_of_edges()/df['nd-edges'][idx] + for utype in ['avg', 'max']: + B, s_values[idx][f'{utype}-{kind}'] = dc.backbone(U, weight=f'{utype}_distance', kind=kind, distortion=True) + df[f'tau-{utype}-{kind}'][idx] = B.number_of_edges()/df['nu-edges'][idx] + print('--- Exporting Formats ---') - ensurePathExists(wGgraphml) ensurePathExists(wFdistortion) - print('> Backbone') - pk.dump(G, open(wGgraphml, 'wb')) + print('> Backbone Statistics') + df.to_csv(wGstats) print('> Distortion') - pk.dump(distortion_dist, open(wFdistortion, 'wb')) + pk.dump(s_values, open(wFdistortion, 'wb')) print('\n') diff --git a/22-backbone-comparison.py b/22-backbone-comparison.py index ddccbd8..bb4c160 100644 --- a/22-backbone-comparison.py +++ b/22-backbone-comparison.py @@ -47,8 +47,7 @@ for idx, network in enumerate(networks): print(network) - settings = config[network] - folder = settings.get('folder') + folder = config[network].get('folder') dfM.loc[idx, 'name'] = network dfU.loc[idx, 'name'] = network diff --git a/22-network-stats.py b/22-network-stats.py new file mode 100644 index 0000000..e7b79c7 --- /dev/null +++ b/22-network-stats.py @@ -0,0 +1,93 @@ +# coding=utf-8 +# Author: Rion B Correia & Felipe Xavier Costa +# Date: Feb 22, 2023 +# +# Description: Reads a network and computes backbone size statistics. +# +# +import numpy as np +import pandas as pd +pd.set_option('display.max_rows', 100) +pd.set_option('display.max_columns', 500) +pd.set_option('display.width', 1000) +#pd.options.display.float_format = '{:.2%}'.format +import networkx as nx +import argparse +import configparser +from utils import get_asymmetry_distribution +import pickle as pk + + +if __name__ == '__main__': + + # + # Init + # + config = configparser.ConfigParser() + config.read('networks.ini') + networks = list(config.keys())[1:] + + # + # Args + # + parser = argparse.ArgumentParser() + parser.add_argument("--network", default='bike-sharing', type=str, choices=networks, help="Network name.") + args = parser.parse_args() + # + network = args.network + # + settings = config[network] + folder = settings.get('folder') + + # Files + rGfile = 'networks/{folder:s}/undirected_networks.pickle'.format(folder=folder) + rBfile = 'networks/{folder:s}/undirected_backbones.pickle'.format(folder=folder) + wGstats = 'networks/{folder:s}/undirected_networks-stats.csv'.format(folder=folder) + #wFasymmetry = 'networks/{folder:s}/backbone_asymmetry.pickle'.format(folder=folder) + + # Load graph + G = pk.load(open(rGfile, 'rb')) + B = pk.load(open(rBfile, 'rb')) + + df = pd.DataFrame(columns=['n-nodes', 'n-edges', 'density', 'tau-edges-metric','tau-edges-ultrametric'], + index=['min', 'max', 'avg', 'harm']) + + # Min and Harm can be done in entrie network + for type in ['min', 'harm']: + print(type) + df['n-nodes'][type] = G[type].number_of_nodes() + df['n-edges'][type] = G[type].number_of_edges() + df['density'][type] = nx.density(G[type]) + + if df['n-edges'][type] > 0: + df['tau-edges-metric'][type] = B[type].number_of_edges()/df['n-edges'][type] + df['tau-edges-ultrametric'][type] = sum([int(d) for _, _, d in B[type].edges(data='ultrametric')])/df['n-edges'][type] + else: + df['tau-edges-metric'][type] = 0.0 + df['tau-edges-ultrametric'][type] = 0.0 + + # Max and Avg Should be in a subset of the LSCC + rDfile = 'networks/{folder:s}/network.graphml'.format(folder=folder) + D = nx.read_graphml(rDfile) + lscc_nodes = max(nx.strongly_connected_components(D), key=len) + for type in ['max', 'avg']: + print(type) + g = G[type].subgraph(lscc_nodes) + + df['n-nodes'][type] = g.number_of_nodes() + df['n-edges'][type] = g.number_of_edges() + df['density'][type] = nx.density(g) + + if df['n-edges'][type] > 0: + b = B[type].subgraph(lscc_nodes) + df['tau-edges-metric'][type] = b.number_of_edges()/df['n-edges'][type] + df['tau-edges-ultrametric'][type] = sum([int(d) for _, _, d in b.edges(data='ultrametric')])/df['n-edges'][type] + else: + df['tau-edges-metric'][type] = 0.0 + df['tau-edges-ultrametric'][type] = 0.0 + + + # Print + print(df) + df.to_csv(wGstats) + print("\n\n") diff --git a/README.md b/README.md index 8d73826..b4788c3 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,11 @@ Source codes for the Directed Distance Backbone project. +The considered networks have edge attributes 'distance' by construction. + ### Scripts functionalities: | Starts with | Functionality | |-------------|-------------------------| | 0 | Specific model measures | | 1 | All models analysis | -| 2 | Undirected analysis | \ No newline at end of file +| 2 | Undirected analysis | diff --git a/Summary/BackboneCompareStats.csv b/Summary/BackboneCompareStats.csv new file mode 100644 index 0000000..f2a317b --- /dev/null +++ b/Summary/BackboneCompareStats.csv @@ -0,0 +1,21 @@ +,n-edges,n-min-edges,n-max-edges,%-metric,%-min-metric,%-max-metric,%-avg-metric,%-harm-metric,%-ultrametric,%-min-ultrametric,%-max-ultrametric,%-avg-ultrametric,%-harm-ultrametric +business-faculty,3432.0,2944,488,0.3508158508158508,0.1745923913043478,0.6290983606557377,0.6618852459016393,0.1864809782608695,0.0976107226107226,0.0431385869565217,0.1762295081967213,0.1721311475409836,0.0407608695652173 +cs-faculty,2929.0,2741,188,0.5148514851485149,0.3349142648668369,0.824468085106383,0.8776595744680851,0.3283473184968989,0.2266985319221577,0.1130974097044874,0.4787234042553192,0.4574468085106383,0.0981393651951842 +history-faculty,2428.0,2266,162,0.414332784184514,0.2436010591350397,0.7716049382716049,0.8148148148148148,0.2471315092674316,0.2199341021416803,0.1032656663724624,0.4259259259259259,0.3641975308641975,0.0966460723742277 +caviar-proj,295.0,205,90,0.7050847457627119,0.5902439024390244,0.7555555555555555,0.7444444444444445,0.5951219512195122,0.6474576271186441,0.5560975609756098,0.6888888888888889,0.6888888888888889,0.5317073170731708 +celegans-her,3500.0,2932,568,0.5568571428571428,0.3216234652114597,0.823943661971831,0.8433098591549296,0.3219645293315143,0.2677142857142857,0.114256480218281,0.4348591549295774,0.3943661971830985,0.1105047748976807 +celegans-male,3474.0,2832,642,0.5400115141047783,0.330861581920904,0.6947040498442367,0.7305295950155763,0.3343926553672316,0.2769142199194012,0.1228813559322034,0.397196261682243,0.381619937694704,0.1182909604519774 +colombia-calls,438484.0,238586,199898,0.026014632232875,0.0097994014736824,0.0157030085343525,0.0151177100321163,0.0097281483406402,0.0089011229600167,0.003638101145918,0.0043172017729041,0.004312199221603,0.0036129529813149 +colombia-mobility,173857.0,106707,67150,0.0170887568518955,0.0130544387903324,0.0181831720029784,0.0180938198064035,0.0128576382055535,0.0121651702261053,0.0080875668887701,0.0128369322412509,0.0128369322412509,0.0080781954323521 +mobility-manizales,2518.0,1281,1237,0.2664813343923749,0.1194379391100702,0.2651576394502829,0.2497978981406629,0.1498829039812646,0.0766481334392374,0.0437158469945355,0.0452708164915117,0.0452708164915117,0.0437158469945355 +mobility-medellin,33884.0,18381,15503,0.2467241175776177,0.1514607475110168,0.1897697219892924,0.205379603947623,0.1702301289374898,0.0544209656475032,0.0245906098688863,0.0271560343159388,0.0266400051602915,0.0224144497034981 +tennis-loss,101436.0,85084,16352,0.5961985882724082,0.1592073715387146,0.5377323874755382,0.6114848336594912,0.1604296930092614,0.2377656847667494,0.090863147007663,0.0912426614481409,0.077727495107632,0.0866202811339382 +yeast-grn,1666106.0,1613628,52478,0.0637174345449809,0.0093491188799401,0.0983840847593277,0.0939250733640763,0.0091105260939944,0.0137824364116088,0.0039104428034218,0.0251915088227447,0.0233240596059301,0.0038930906008076 +bike-sharing,53118.0,35063,18055,0.5953160887081592,0.5589367709551379,0.6353918582110218,0.6767654389365827,0.3849071670992214,0.0275236266425693,0.0207626272709123,0.0400443090556632,0.0399889227360841,0.0206485469012919 +giraffe,30.0,15,15,0.7666666666666667,0.7333333333333333,0.8,0.8666666666666667,0.8,0.3333333333333333,0.3333333333333333,0.3333333333333333,0.3333333333333333,0.3333333333333333 +comorbidity,8930.0,4465,4465,0.4743561030235162,0.3659574468085106,0.5117581187010078,0.5068309070548712,0.4665173572228443,0.0217245240761478,0.0210526315789473,0.0210526315789473,0.0210526315789473,0.0210526315789473 +phone-calls,609.0,430,179,0.916256157635468,0.8651162790697674,0.9497206703910616,0.9553072625698324,0.8511627906976744,0.8489326765188834,0.7465116279069768,0.9273743016759776,0.9273743016759776,0.7465116279069768 +us-airports,18906.0,11973,6933,0.2743044536126097,0.1382276789442913,0.2271743833838165,0.243473243905957,0.1440741668754698,0.1898339151592087,0.0900359141401486,0.1339968267705178,0.133852589066782,0.0897853503716695 +DDI,2966.0,1483,1483,0.5900202292650034,0.4895482130815913,0.4389750505731625,0.4598786244099798,0.4942683749157114,0.4049224544841537,0.2832097100472016,0.2784895482130816,0.2771409305461901,0.2771409305461901 +us-weblinks,505476.0,450539,54937,0.367809747643805,0.1606897516086287,0.5996141034275625,0.579208912026503,0.1607940711015028,0.2536678299266434,0.1127493957237886,0.4496059122267324,0.3865336658354115,0.1081992901835357 +host-pathogen,18529.0,18529,0,0.9985967942144746,0.6638242754600896,0.0,0.0,0.6638242754600896,0.9984348858546064,0.591451238598953,0.0,0.0,0.591451238598953 \ No newline at end of file diff --git a/Summary/BackboneCompareStats_Components.csv b/Summary/BackboneCompareStats_Components.csv new file mode 100644 index 0000000..e25691c --- /dev/null +++ b/Summary/BackboneCompareStats_Components.csv @@ -0,0 +1,148 @@ +,n-nodes,nd-edges,nu-edges,tau-metric,tau-ultrametric,tau-avg-metric,tau-avg-ultrametric,tau-max-metric,tau-max-ultrametric +business-faculty-0,84.0,2558.0,488.0,0.3924941360437842,0.0965598123534011,0.6618852459016393,0.1721311475409836,0.6290983606557377,0.1762295081967213 +cs-faculty-0,78.0,1180.0,179.0,0.5830508474576271,0.1932203389830508,0.8715083798882681,0.4301675977653631,0.8156424581005587,0.4525139664804469 +history-faculty-0,56.0,905.0,158.0,0.5348066298342542,0.2154696132596685,0.810126582278481,0.3481012658227848,0.7658227848101266,0.4113924050632911 +caviar-proj-0,63.0,232.0,90.0,0.6594827586206896,0.5948275862068966,0.7444444444444445,0.6888888888888889,0.7555555555555555,0.6888888888888889 +celegans-her-0,215.0,2748.0,558.0,0.5673216885007278,0.2536390101892285,0.8405017921146953,0.3835125448028674,0.8207885304659498,0.4247311827956989 +celegans-her-1,3.0,5.0,2.0,1.0,0.8,1.0,1.0,1.0,1.0 +celegans-her-3,4.0,8.0,3.0,0.875,0.875,1.0,1.0,1.0,1.0 +celegans-male-0,238.0,2852.0,634.0,0.5157784011220197,0.2457924263674614,0.7271293375394322,0.3738170347003154,0.6908517350157729,0.389589905362776 +colombia-calls-0,863.0,438484.0,199898.0,0.026014632232875,0.0089011229600167,0.0151177100321163,0.004312199221603,0.0157030085343525,0.0043172017729041 +colombia-mobility-0,863.0,173857.0,67150.0,0.0170887568518955,0.0121651702261053,0.0180938198064035,0.0128369322412509,0.0181831720029784,0.0128369322412509 +mobility-manizales-0,57.0,2518.0,1237.0,0.2664813343923749,0.0766481334392374,0.2497978981406629,0.0452708164915117,0.2651576394502829,0.0452708164915117 +mobility-medellin-0,413.0,33884.0,15503.0,0.2467241175776177,0.0544209656475032,0.205379603947623,0.0266400051602915,0.1897697219892924,0.0271560343159388 +tennis-loss-0,1263.0,85842.0,16349.0,0.5327345588406608,0.1286549707602339,0.6114135421126674,0.0775582604440638,0.5376475625420515,0.0910759067832895 +yeast-grn-0,1225.0,366239.0,52478.0,0.0662518191672651,0.0154953459353045,0.0939250733640763,0.0233240596059301,0.0983840847593277,0.0251915088227447 +bike-sharing-0,723.0,53115.0,18055.0,0.5952932316671373,0.0274875270639179,0.6767654389365827,0.0399889227360841,0.6353918582110218,0.0400443090556632 +giraffe-0,6.0,30.0,15.0,0.7666666666666667,0.3333333333333333,0.8666666666666667,0.3333333333333333,0.8,0.3333333333333333 +comorbidity-0,95.0,8930.0,4465.0,0.4743561030235162,0.0217245240761478,0.5068309070548712,0.0210526315789473,0.5117581187010078,0.0210526315789473 +phone-calls-1,30.0,75.0,33.0,0.84,0.7466666666666667,0.9393939393939394,0.8787878787878788,0.9393939393939394,0.8787878787878788 +phone-calls-2,9.0,19.0,9.0,1.0,0.8421052631578947,1.0,0.8888888888888888,1.0,0.8888888888888888 +phone-calls-3,24.0,64.0,26.0,0.8125,0.75,0.9615384615384616,0.8846153846153846,0.9230769230769232,0.8846153846153846 +phone-calls-4,6.0,12.0,6.0,0.8333333333333334,0.75,0.8333333333333334,0.8333333333333334,0.8333333333333334,0.8333333333333334 +phone-calls-6,8.0,16.0,7.0,0.9375,0.875,1.0,1.0,1.0,1.0 +phone-calls-7,7.0,14.0,6.0,0.8571428571428571,0.8571428571428571,1.0,1.0,1.0,1.0 +us-airports-0,926.0,18593.0,6930.0,0.2654224708223525,0.1797988490292045,0.2431457431457431,0.1334776334776334,0.2268398268398268,0.1336219336219336 +DDI-0,412.0,2966.0,1483.0,0.5900202292650034,0.4049224544841537,0.4598786244099798,0.2771409305461901,0.4389750505731625,0.2784895482130816 +us-weblinks-0,18112.0,288838.0,49855.0,0.3465160401332234,0.2233743482505764,0.5658008223849162,0.3718583893290542,0.5850165479891686,0.43654598335172 +us-weblinks-1,273.0,1720.0,374.0,0.55,0.4860465116279069,0.7834224598930482,0.7272727272727273,0.8048128342245989,0.732620320855615 +us-weblinks-2,31.0,107.0,44.0,0.6074766355140186,0.5981308411214953,0.6818181818181818,0.6818181818181818,0.7045454545454546,0.6818181818181818 +us-weblinks-3,21.0,385.0,175.0,0.9974025974025974,0.8701298701298701,1.0,0.5257142857142857,1.0,0.8685714285714285 +us-weblinks-6,262.0,3343.0,551.0,0.3885731379000897,0.3281483697277894,0.515426497277677,0.4736842105263157,0.5172413793103449,0.4900181488203267 +us-weblinks-7,3.0,5.0,2.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-9,88.0,956.0,226.0,0.3085774058577405,0.256276150627615,0.4734513274336283,0.3849557522123893,0.4690265486725664,0.3849557522123893 +us-weblinks-10,3.0,5.0,2.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-13,278.0,3860.0,837.0,0.2821243523316062,0.2152849740932642,0.3691756272401433,0.3309438470728793,0.3703703703703703,0.3333333333333333 +us-weblinks-14,18.0,70.0,34.0,0.4857142857142857,0.4857142857142857,0.5,0.5,0.5,0.5 +us-weblinks-15,9.0,70.0,34.0,1.0,1.0,1.0,0.2647058823529412,1.0,1.0 +us-weblinks-16,30.0,191.0,44.0,0.5968586387434555,0.450261780104712,0.8863636363636364,0.6590909090909091,0.8636363636363636,0.75 +us-weblinks-17,12.0,125.0,59.0,0.192,0.184,0.1864406779661017,0.1864406779661017,0.288135593220339,0.1864406779661017 +us-weblinks-18,9.0,44.0,14.0,0.8636363636363636,0.8409090909090909,0.8571428571428571,0.7142857142857143,0.8571428571428571,0.7857142857142857 +us-weblinks-22,19.0,325.0,154.0,0.9907692307692308,0.3015384615384615,1.0,0.1168831168831168,0.9935064935064936,0.1753246753246753 +us-weblinks-24,3.0,5.0,2.0,1.0,0.8,1.0,1.0,1.0,1.0 +us-weblinks-25,6.0,26.0,11.0,0.3846153846153846,0.3846153846153846,0.4545454545454545,0.4545454545454545,0.4545454545454545,0.4545454545454545 +us-weblinks-26,4.0,9.0,3.0,0.7777777777777778,0.7777777777777778,1.0,1.0,1.0,1.0 +us-weblinks-27,4.0,8.0,3.0,0.75,0.75,1.0,1.0,1.0,1.0 +us-weblinks-28,10.0,75.0,30.0,0.9866666666666668,0.4266666666666667,0.9666666666666668,0.3,1.0,0.3 +us-weblinks-30,4.0,8.0,4.0,0.75,0.75,0.75,0.75,0.75,0.75 +us-weblinks-34,3.0,5.0,2.0,0.6,0.6,1.0,1.0,1.0,1.0 +us-weblinks-36,3.0,6.0,3.0,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.6666666666666666 +us-weblinks-40,3.0,6.0,3.0,1.0,1.0,1.0,0.6666666666666666,1.0,1.0 +us-weblinks-41,4.0,8.0,4.0,0.875,0.875,0.75,0.75,0.75,0.75 +us-weblinks-42,3.0,5.0,2.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-46,4.0,12.0,6.0,1.0,1.0,1.0,0.8333333333333334,1.0,1.0 +us-weblinks-47,3.0,5.0,2.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-50,3.0,5.0,2.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-51,14.0,54.0,21.0,0.7777777777777778,0.5370370370370371,0.9523809523809524,0.6190476190476191,0.9523809523809524,0.6666666666666666 +us-weblinks-52,4.0,9.0,4.0,0.7777777777777778,0.7777777777777778,0.75,0.75,0.75,0.75 +us-weblinks-57,3.0,5.0,2.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-58,12.0,27.0,11.0,0.8518518518518519,0.8518518518518519,1.0,1.0,1.0,1.0 +us-weblinks-61,6.0,11.0,5.0,0.9090909090909092,0.9090909090909092,1.0,1.0,1.0,1.0 +us-weblinks-62,5.0,9.0,4.0,0.8888888888888888,0.8888888888888888,1.0,1.0,1.0,1.0 +us-weblinks-64,4.0,7.0,3.0,0.8571428571428571,0.8571428571428571,1.0,1.0,1.0,1.0 +us-weblinks-65,3.0,6.0,3.0,1.0,1.0,1.0,0.6666666666666666,1.0,1.0 +us-weblinks-68,4.0,11.0,5.0,0.5454545454545454,0.5454545454545454,0.6,0.6,0.6,0.6 +us-weblinks-69,3.0,5.0,2.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-70,30.0,715.0,290.0,0.558041958041958,0.2489510489510489,0.4206896551724138,0.1,0.7793103448275862,0.1689655172413793 +us-weblinks-74,4.0,10.0,4.0,0.6,0.6,0.75,0.75,0.75,0.75 +us-weblinks-75,8.0,56.0,28.0,0.25,0.25,0.25,0.25,0.3571428571428571,0.25 +us-weblinks-76,5.0,14.0,4.0,0.7142857142857143,0.6428571428571429,1.0,1.0,1.0,1.0 +us-weblinks-77,7.0,31.0,10.0,0.8709677419354839,0.4516129032258064,1.0,0.6,1.0,0.6 +us-weblinks-78,4.0,10.0,4.0,1.0,1.0,1.0,0.75,1.0,1.0 +us-weblinks-79,4.0,10.0,4.0,0.9,0.9,1.0,0.75,1.0,0.75 +us-weblinks-82,4.0,7.0,3.0,0.8571428571428571,0.8571428571428571,1.0,1.0,1.0,1.0 +us-weblinks-83,6.0,11.0,5.0,0.9090909090909092,0.9090909090909092,1.0,1.0,1.0,1.0 +us-weblinks-84,4.0,12.0,6.0,0.5,0.5,0.5,0.5,0.5,0.5 +us-weblinks-86,3.0,6.0,3.0,0.8333333333333334,0.8333333333333334,0.6666666666666666,0.6666666666666666,1.0,1.0 +us-weblinks-87,10.0,70.0,32.0,0.4,0.3285714285714285,0.59375,0.28125,0.59375,0.375 +us-weblinks-89,13.0,156.0,78.0,1.0,0.2243589743589743,1.0,0.1538461538461538,1.0,0.1538461538461538 +us-weblinks-90,6.0,30.0,15.0,0.6333333333333333,0.4,0.7333333333333333,0.3333333333333333,1.0,0.3333333333333333 +us-weblinks-91,3.0,6.0,3.0,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.6666666666666666 +us-weblinks-93,4.0,7.0,3.0,0.8571428571428571,0.8571428571428571,1.0,1.0,1.0,1.0 +us-weblinks-95,3.0,5.0,2.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-96,4.0,9.0,3.0,0.7777777777777778,0.6666666666666666,1.0,1.0,1.0,1.0 +us-weblinks-97,3.0,6.0,3.0,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.6666666666666666 +us-weblinks-98,3.0,5.0,2.0,1.0,0.8,1.0,1.0,1.0,1.0 +us-weblinks-99,5.0,20.0,10.0,0.4,0.4,0.4,0.4,0.4,0.4 +us-weblinks-100,4.0,8.0,3.0,0.75,0.75,1.0,1.0,1.0,1.0 +us-weblinks-104,5.0,12.0,6.0,0.75,0.75,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.6666666666666666 +us-weblinks-105,6.0,14.0,5.0,0.8571428571428571,0.8571428571428571,1.0,1.0,1.0,1.0 +us-weblinks-108,5.0,10.0,4.0,0.9,0.9,1.0,1.0,1.0,1.0 +us-weblinks-109,13.0,146.0,68.0,0.1712328767123287,0.1643835616438356,0.1911764705882352,0.1764705882352941,0.1911764705882352,0.1764705882352941 +us-weblinks-110,5.0,9.0,4.0,0.8888888888888888,0.8888888888888888,1.0,1.0,1.0,1.0 +us-weblinks-111,4.0,12.0,6.0,1.0,1.0,1.0,0.5,1.0,1.0 +us-weblinks-112,6.0,30.0,15.0,1.0,0.8666666666666667,1.0,0.3333333333333333,1.0,0.7333333333333333 +us-weblinks-120,5.0,20.0,10.0,0.45,0.4,0.5,0.4,0.5,0.4 +us-weblinks-121,4.0,8.0,3.0,0.75,0.75,1.0,1.0,1.0,1.0 +us-weblinks-122,6.0,30.0,15.0,1.0,0.5333333333333333,1.0,0.3333333333333333,1.0,0.4666666666666667 +us-weblinks-124,17.0,44.0,22.0,1.0,0.9545454545454546,1.0,0.9090909090909092,1.0,0.9090909090909092 +us-weblinks-126,6.0,14.0,5.0,0.8571428571428571,0.8571428571428571,1.0,1.0,1.0,1.0 +us-weblinks-127,3.0,5.0,2.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-130,5.0,10.0,4.0,0.9,0.9,1.0,1.0,1.0,1.0 +us-weblinks-132,3.0,6.0,3.0,0.8333333333333334,0.8333333333333334,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.6666666666666666 +us-weblinks-133,10.0,25.0,9.0,1.0,0.88,1.0,1.0,1.0,1.0 +us-weblinks-136,3.0,5.0,2.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-137,3.0,6.0,3.0,0.8333333333333334,0.8333333333333334,1.0,0.6666666666666666,0.6666666666666666,0.6666666666666666 +us-weblinks-138,4.0,12.0,6.0,0.5,0.5,0.5,0.5,0.5,0.5 +us-weblinks-139,10.0,74.0,30.0,1.0,1.0,1.0,0.3,1.0,1.0 +us-weblinks-141,3.0,6.0,3.0,0.8333333333333334,0.8333333333333334,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.6666666666666666 +us-weblinks-143,7.0,15.0,6.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-144,8.0,56.0,28.0,1.0,1.0,1.0,0.6428571428571429,1.0,1.0 +us-weblinks-146,3.0,5.0,2.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-147,3.0,5.0,2.0,1.0,0.8,1.0,1.0,1.0,1.0 +us-weblinks-148,6.0,20.0,7.0,0.55,0.55,0.7142857142857143,0.7142857142857143,0.7142857142857143,0.7142857142857143 +us-weblinks-149,8.0,16.0,7.0,1.0,0.875,1.0,1.0,1.0,1.0 +us-weblinks-150,8.0,20.0,8.0,0.95,0.95,0.875,0.875,0.875,0.875 +us-weblinks-152,8.0,16.0,7.0,0.9375,0.9375,1.0,1.0,1.0,1.0 +us-weblinks-153,4.0,7.0,3.0,0.8571428571428571,0.8571428571428571,1.0,1.0,1.0,1.0 +us-weblinks-154,7.0,42.0,21.0,1.0,0.7619047619047619,1.0,0.5714285714285714,1.0,0.5714285714285714 +us-weblinks-156,3.0,5.0,2.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-159,3.0,5.0,2.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-161,8.0,27.0,8.0,0.9629629629629628,0.925925925925926,1.0,0.875,1.0,0.875 +us-weblinks-162,3.0,5.0,2.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-166,3.0,6.0,3.0,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.6666666666666666 +us-weblinks-168,9.0,72.0,36.0,1.0,0.2222222222222222,1.0,0.2222222222222222,1.0,0.2222222222222222 +us-weblinks-170,8.0,56.0,28.0,1.0,0.4642857142857143,1.0,0.4642857142857143,1.0,0.4642857142857143 +us-weblinks-171,3.0,6.0,3.0,0.8333333333333334,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.6666666666666666 +us-weblinks-173,4.0,8.0,3.0,0.75,0.75,1.0,1.0,1.0,1.0 +us-weblinks-174,3.0,5.0,2.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-176,3.0,5.0,2.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-179,3.0,5.0,2.0,1.0,0.8,1.0,1.0,1.0,1.0 +us-weblinks-182,4.0,7.0,3.0,0.8571428571428571,0.8571428571428571,1.0,1.0,1.0,1.0 +us-weblinks-185,4.0,11.0,5.0,0.9090909090909092,0.7272727272727273,0.8,0.6,0.8,0.6 +us-weblinks-187,4.0,7.0,3.0,0.8571428571428571,0.8571428571428571,1.0,1.0,1.0,1.0 +us-weblinks-190,5.0,11.0,5.0,0.7272727272727273,0.7272727272727273,0.8,0.8,0.8,0.8 +us-weblinks-195,6.0,13.0,5.0,0.9230769230769232,0.7692307692307693,1.0,1.0,1.0,1.0 +us-weblinks-196,3.0,6.0,3.0,1.0,1.0,1.0,0.6666666666666666,1.0,1.0 +us-weblinks-199,5.0,12.0,5.0,0.6666666666666666,0.6666666666666666,0.8,0.8,0.8,0.8 +us-weblinks-200,6.0,30.0,15.0,0.8666666666666667,0.6,1.0,0.3333333333333333,1.0,0.5333333333333333 +us-weblinks-201,3.0,5.0,2.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-202,3.0,5.0,2.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-204,4.0,10.0,4.0,1.0,1.0,1.0,0.75,1.0,1.0 +us-weblinks-205,3.0,6.0,3.0,1.0,0.6666666666666666,1.0,0.6666666666666666,1.0,0.6666666666666666 +us-weblinks-206,5.0,17.0,7.0,0.5882352941176471,0.5294117647058824,0.8571428571428571,0.5714285714285714,1.0,0.5714285714285714 +us-weblinks-209,3.0,6.0,3.0,0.8333333333333334,0.8333333333333334,0.6666666666666666,0.6666666666666666,0.6666666666666666,0.6666666666666666 +us-weblinks-211,4.0,8.0,4.0,0.75,0.75,0.75,0.75,0.75,0.75 +us-weblinks-212,3.0,5.0,2.0,0.8,0.8,1.0,1.0,1.0,1.0 +us-weblinks-213,5.0,13.0,5.0,0.9230769230769232,0.9230769230769232,1.0,0.8,1.0,1.0 +us-weblinks-214,4.0,7.0,3.0,0.8571428571428571,0.8571428571428571,1.0,1.0,1.0,1.0 diff --git a/Summary/BackboneCompareStats_LSCC.csv b/Summary/BackboneCompareStats_LSCC.csv new file mode 100644 index 0000000..9a12889 --- /dev/null +++ b/Summary/BackboneCompareStats_LSCC.csv @@ -0,0 +1,20 @@ +,n-nodes,n-edges,n-max-edges,tau-metric,tau-max-metric,tau-avg-metric,tau-ultrametric,tau-max-ultrametric,tau-avg-ultrametric +business-faculty,94.0,2842.0,488,0.3652357494722026,0.6290983606557377,0.6618852459016393,0.0946516537649542,0.1762295081967213,0.1721311475409836 +cs-faculty,167.0,2384.0,188,0.5285234899328859,0.824468085106383,0.8776595744680851,0.2202181208053691,0.4787234042553192,0.4574468085106383 +history-faculty,116.0,1897.0,162,0.430152872957301,0.7716049382716049,0.8148148148148148,0.2087506589351608,0.4259259259259259,0.3641975308641975 +caviar-proj,66.0,242.0,90,0.6611570247933884,0.7555555555555555,0.7444444444444445,0.5991735537190083,0.6888888888888889,0.6888888888888889 +celegans-her,249.0,3046.0,568,0.5531845042678923,0.823943661971831,0.8433098591549296,0.2567301378857518,0.4348591549295774,0.3943661971830985 +celegans-male,289.0,3206.0,642,0.5343106674984405,0.6947040498442367,0.7305295950155763,0.2682470368059887,0.397196261682243,0.381619937694704 +colombia-calls,863.0,438484.0,199898,0.026014632232875,0.0157030085343525,0.0151177100321163,0.0089011229600167,0.0043172017729041,0.004312199221603 +colombia-mobility,863.0,173857.0,67150,0.0170887568518955,0.0181831720029784,0.0180938198064035,0.0121651702261053,0.0128369322412509,0.0128369322412509 +mobility-manizales,57.0,2518.0,1237,0.2664813343923749,0.2651576394502829,0.2497978981406629,0.0766481334392374,0.0452708164915117,0.0452708164915117 +mobility-medellin,413.0,33884.0,15503,0.2467241175776177,0.1897697219892924,0.205379603947623,0.0544209656475032,0.0271560343159388,0.0266400051602915 +tennis-loss,2428.0,98102.0,16352,0.5824753827648774,0.5377323874755382,0.6114848336594912,0.2118611241361032,0.0912426614481409,0.077727495107632 +yeast-grn,1229.0,367432.0,52478,0.0662244986827494,0.0983840847593277,0.0939250733640763,0.0154913017918961,0.0251915088227447,0.0233240596059301 +bike-sharing,723.0,53115.0,18055,0.5952932316671373,0.6353918582110218,0.6767654389365827,0.0274875270639179,0.0400443090556632,0.0399889227360841 +giraffe,6.0,30.0,15,0.7666666666666667,0.8,0.8666666666666667,0.3333333333333333,0.3333333333333333,0.3333333333333333 +comorbidity,95.0,8930.0,4465,0.4743561030235162,0.5117581187010078,0.5068309070548712,0.0217245240761478,0.0210526315789473,0.0210526315789473 +phone-calls,114.0,274.0,97,0.8832116788321168,0.9484536082474226,0.9587628865979382,0.7737226277372263,0.9072164948453608,0.9072164948453608 +us-airports,995.0,18820.0,6932,0.2710945802337938,0.2270628967109059,0.2433641084824004,0.1862911795961742,0.133871898442008,0.1337276399307559 +DDI,412.0,2966.0,1483,0.5900202292650034,0.4389750505731625,0.4598786244099798,0.4049224544841537,0.2784895482130816,0.2771409305461901 +us-weblinks,27725.0,372626.0,54259,0.3800352095666969,0.5953850974031958,0.5753884148251903,0.2562837805198778,0.44735435595938,0.3844339187968816 diff --git a/Summary/BackboneCompareStats_LargestComponent.csv b/Summary/BackboneCompareStats_LargestComponent.csv new file mode 100644 index 0000000..3d6cded --- /dev/null +++ b/Summary/BackboneCompareStats_LargestComponent.csv @@ -0,0 +1,20 @@ +Network,n-nodes,nd-edges,nu-edges,tau-metric,tau-ultrametric,tau-avg-metric,tau-avg-ultrametric,tau-max-metric,tau-max-ultrametric +business-faculty,84.0,2558.0,488.0,0.3924941360437842,0.0965598123534011,0.6618852459016393,0.1721311475409836,0.6290983606557377,0.1762295081967213 +cs-faculty,78.0,1180.0,179.0,0.5830508474576271,0.1932203389830508,0.8715083798882681,0.4301675977653631,0.8156424581005587,0.4525139664804469 +history-faculty,56.0,905.0,158.0,0.5348066298342542,0.2154696132596685,0.810126582278481,0.3481012658227848,0.7658227848101266,0.4113924050632911 +caviar-proj,63.0,232.0,90.0,0.6594827586206896,0.5948275862068966,0.7444444444444445,0.6888888888888889,0.7555555555555555,0.6888888888888889 +celegans-her,215.0,2748.0,558.0,0.5673216885007278,0.2536390101892285,0.8405017921146953,0.3835125448028674,0.8207885304659498,0.4247311827956989 +celegans-male,238.0,2852.0,634.0,0.5157784011220197,0.2457924263674614,0.7271293375394322,0.3738170347003154,0.6908517350157729,0.389589905362776 +colombia-calls,863.0,438484.0,199898.0,0.026014632232875,0.0089011229600167,0.0151177100321163,0.004312199221603,0.0157030085343525,0.0043172017729041 +colombia-mobility,863.0,173857.0,67150.0,0.0170887568518955,0.0121651702261053,0.0180938198064035,0.0128369322412509,0.0181831720029784,0.0128369322412509 +mobility-manizales,57.0,2518.0,1237.0,0.2664813343923749,0.0766481334392374,0.2497978981406629,0.0452708164915117,0.2651576394502829,0.0452708164915117 +mobility-medellin,413.0,33884.0,15503.0,0.2467241175776177,0.0544209656475032,0.205379603947623,0.0266400051602915,0.1897697219892924,0.0271560343159388 +tennis-loss,1263.0,85842.0,16349.0,0.5327345588406608,0.1286549707602339,0.6114135421126674,0.0775582604440638,0.5376475625420515,0.0910759067832895 +yeast-grn,1225.0,366239.0,52478.0,0.0662518191672651,0.0154953459353045,0.0939250733640763,0.0233240596059301,0.0983840847593277,0.0251915088227447 +bike-sharing,723.0,53115.0,18055.0,0.5952932316671373,0.0274875270639179,0.6767654389365827,0.0399889227360841,0.6353918582110218,0.0400443090556632 +giraffe,6.0,30.0,15.0,0.7666666666666667,0.3333333333333333,0.8666666666666667,0.3333333333333333,0.8,0.3333333333333333 +comorbidity,95.0,8930.0,4465.0,0.4743561030235162,0.0217245240761478,0.5068309070548712,0.0210526315789473,0.5117581187010078,0.0210526315789473 +phone-calls,30.0,75.0,33.0,0.84,0.7466666666666667,0.9393939393939394,0.8787878787878788,0.9393939393939394,0.8787878787878788 +us-airports,926.0,18593.0,6930.0,0.2654224708223525,0.1797988490292045,0.2431457431457431,0.1334776334776334,0.2268398268398268,0.1336219336219336 +DDI,412.0,2966.0,1483.0,0.5900202292650034,0.4049224544841537,0.4598786244099798,0.2771409305461901,0.4389750505731625,0.2784895482130816 +us-weblinks,18112.0,288838.0,49855.0,0.3465160401332234,0.2233743482505764,0.5658008223849162,0.3718583893290542,0.5850165479891686,0.43654598335172 diff --git a/Summary/BackboneCompareStats_WCC.csv b/Summary/BackboneCompareStats_WCC.csv new file mode 100644 index 0000000..0c8b853 --- /dev/null +++ b/Summary/BackboneCompareStats_WCC.csv @@ -0,0 +1,21 @@ +,n-nodes,n-edges,n-min-edges,tau-metric,tau-min-metric,tau-harm-metric,tau-ultrametric,tau-min-ultrametric,tau-harm-ultrametric +business-faculty,113.0,3432.0,2944,0.3508158508158508,0.1745923913043478,0.1864809782608695,0.0976107226107226,0.0431385869565217,0.0407608695652173 +cs-faculty,206.0,2929.0,2741,0.5148514851485149,0.3349142648668369,0.3283473184968989,0.2266985319221577,0.1130974097044874,0.0981393651951842 +history-faculty,145.0,2428.0,2266,0.414332784184514,0.2436010591350397,0.2471315092674316,0.2199341021416803,0.1032656663724624,0.0966460723742277 +caviar-proj,110.0,295.0,205,0.7050847457627119,0.5902439024390244,0.5951219512195122,0.6474576271186441,0.5560975609756098,0.5317073170731708 +celegans-her,313.0,3500.0,2932,0.5568571428571428,0.3216234652114597,0.3219645293315143,0.2677142857142857,0.114256480218281,0.1105047748976807 +celegans-male,328.0,3474.0,2832,0.5400115141047783,0.330861581920904,0.3343926553672316,0.2769142199194012,0.1228813559322034,0.1182909604519774 +colombia-calls,863.0,438484.0,238586,0.026014632232875,0.0097994014736824,0.0097281483406402,0.0089011229600167,0.003638101145918,0.0036129529813149 +colombia-mobility,863.0,173857.0,106707,0.0170887568518955,0.0130544387903324,0.0128576382055535,0.0121651702261053,0.0080875668887701,0.0080781954323521 +mobility-manizales,57.0,2518.0,1281,0.2664813343923749,0.1194379391100702,0.1498829039812646,0.0766481334392374,0.0437158469945355,0.0437158469945355 +mobility-medellin,413.0,33884.0,18381,0.2467241175776177,0.1514607475110168,0.1702301289374898,0.0544209656475032,0.0245906098688863,0.0224144497034981 +tennis-loss,4245.0,101436.0,85084,0.5961985882724082,0.1592073715387146,0.1604296930092614,0.2377656847667494,0.090863147007663,0.0866202811339382 +yeast-grn,6216.0,1666106.0,1613628,0.0637174345449809,0.0093491188799401,0.0091105260939944,0.0137824364116088,0.0039104428034218,0.0038930906008076 +bike-sharing,725.0,53118.0,35063,0.5953160887081592,0.5589367709551379,0.3849071670992214,0.0275236266425693,0.0207626272709123,0.0206485469012919 +giraffe,6.0,30.0,15,0.7666666666666667,0.7333333333333333,0.8,0.3333333333333333,0.3333333333333333,0.3333333333333333 +comorbidity,95.0,8930.0,4465,0.4743561030235162,0.3659574468085106,0.4665173572228443,0.0217245240761478,0.0210526315789473,0.0210526315789473 +phone-calls,322.0,609.0,430,0.916256157635468,0.8651162790697674,0.8511627906976744,0.8489326765188834,0.7465116279069768,0.7465116279069768 +us-airports,1075.0,18906.0,11973,0.2743044536126097,0.1382276789442913,0.1440741668754698,0.1898339151592087,0.0900359141401486,0.0897853503716695 +DDI,412.0,2966.0,1483,0.5900202292650034,0.4895482130815913,0.4942683749157114,0.4049224544841537,0.2832097100472016,0.2771409305461901 +us-weblinks,42800.0,505476.0,450539,0.367809747643805,0.1606897516086287,0.1607940711015028,0.2536678299266434,0.1127493957237886,0.1081992901835357 +host-pathogen,10578.0,18529.0,18529,0.9985967942144746,0.6638242754600896,0.6638242754600896,0.9984348858546064,0.591451238598953,0.591451238598953 diff --git a/Summary/BackboneStats.csv b/Summary/BackboneStats.csv new file mode 100644 index 0000000..21015ff --- /dev/null +++ b/Summary/BackboneStats.csv @@ -0,0 +1,21 @@ +,n-nodes,n-edges,density,tau-metric,tau-ultrametric,ultrametric_metric_ratio,LSCC-nodes,LSCC-edges,LSCC-tau-metric,LSCC-tau-ultrametric,LSCC-ultrametric_metric_ratio +business-faculty,113.0,3432.0,0.2711757269279393,0.3508158508158508,0.0976107226107226,0.2782392026578073,94.0,2842.0,0.3652357494722026,0.0946516537649542,0.2591522157996146 +cs-faculty,206.0,2929.0,0.0693582761070329,0.5148514851485149,0.2266985319221577,0.4403183023872679,167.0,2384.0,0.5285234899328859,0.2202181208053691,0.4166666666666667 +history-faculty,145.0,2428.0,0.1162835249042145,0.414332784184514,0.2199341021416803,0.5308151093439363,116.0,1897.0,0.430152872957301,0.2087506589351608,0.4852941176470589 +caviar-proj,110.0,295.0,0.024603836530442,0.7050847457627119,0.6474576271186441,0.9182692307692308,66.0,242.0,0.6611570247933884,0.5991735537190083,0.90625 +celegans-her,313.0,3500.0,0.0358400917506348,0.5568571428571428,0.2677142857142857,0.4807593637762956,249.0,3046.0,0.5531845042678923,0.2567301378857518,0.4640949554896142 +celegans-male,328.0,3474.0,0.0323897963750279,0.5400115141047783,0.2769142199194012,0.5127931769722814,289.0,3206.0,0.5343106674984405,0.2682470368059887,0.5020431990659661 +colombia-calls,863.0,438484.0,0.5894346866405165,0.026014632232875,0.0089011229600167,0.3421583238362409,863.0,438484.0,0.026014632232875,0.0089011229600167,0.3421583238362409 +colombia-mobility,863.0,173857.0,0.233708291101295,0.0170887568518955,0.0121651702261053,0.7118815213732751,863.0,173857.0,0.0170887568518955,0.0121651702261053,0.7118815213732751 +mobility-manizales,57.0,2518.0,0.7888471177944862,0.2664813343923749,0.0766481334392374,0.2876304023845007,57.0,2518.0,0.2664813343923749,0.0766481334392374,0.2876304023845007 +mobility-medellin,413.0,33884.0,0.1991349114929829,0.2467241175776177,0.0544209656475032,0.2205741626794258,413.0,33884.0,0.2467241175776177,0.0544209656475032,0.2205741626794258 +tennis-loss,4245.0,101436.0,0.005630397351655,0.5961985882724082,0.2377656847667494,0.3988028308750578,2428.0,98102.0,0.5824753827648774,0.2118611241361032,0.3637254558818382 +yeast-grn,6216.0,1666106.0,0.0431271232156187,0.0637174345449809,0.0137824364116088,0.2163055764883195,1229.0,367432.0,0.0662244986827494,0.0154913017918961,0.2339210126166112 +bike-sharing,725.0,53118.0,0.1011964183654029,0.5953160887081592,0.0275236266425693,0.0462336348112073,723.0,53115.0,0.5952932316671373,0.0274875270639179,0.0461747683354944 +giraffe,6.0,30.0,1.0,0.7666666666666667,0.3333333333333333,0.4347826086956521,6.0,30.0,0.7666666666666667,0.3333333333333333,0.4347826086956521 +comorbidity,95.0,8930.0,1.0,0.4743561030235162,0.0217245240761478,0.0457979225684608,95.0,8930.0,0.4743561030235162,0.0217245240761478,0.0457979225684608 +phone-calls,322.0,609.0,0.005891913856156,0.916256157635468,0.8489326765188834,0.9265232974910392,114.0,274.0,0.8832116788321168,0.7737226277372263,0.8760330578512397 +us-airports,1075.0,18906.0,0.016375211121216,0.2743044536126097,0.1898339151592087,0.692055534130351,995.0,18820.0,0.2710945802337938,0.1862911795961742,0.6871814974519795 +DDI,412.0,2966.0,0.0175158859518578,0.5900202292650034,0.4049224544841537,0.6862857142857143,412.0,2966.0,0.5900202292650034,0.4049224544841537,0.6862857142857143 +us-weblinks,42800.0,505476.0,0.0002759453939551,0.367809747643805,0.2536678299266434,0.6896713084730447,27725.0,372626.0,0.3800352095666969,0.2562837805198778,0.6743685165700404 +host-pathogen,10578.0,18529.0,0.0001656097548462,0.9985967942144746,0.9984348858546064,0.9998378641301412,1.0,0.0,0.0,0.0,0.0 diff --git a/Summary/CompareSize_LSCC_WCC.csv b/Summary/CompareSize_LSCC_WCC.csv new file mode 100644 index 0000000..2bfd37f --- /dev/null +++ b/Summary/CompareSize_LSCC_WCC.csv @@ -0,0 +1,20 @@ +Network,N_WCC,N_LSCC,Percent +bike-sharing,725,723,0.997241379310345 +business-faculty,113,84,0.743362831858407 +caviar-proj,110,63,0.572727272727273 +celegans-her,313,215,0.686900958466454 +celegans-male,328,238,0.725609756097561 +colombia-calls,863,863,1 +colombia-mobility,863,863,1 +comorbidity,95,95,1 +cs-faculty,206,78,0.378640776699029 +DDI,412,412,1 +giraffe,6,6,1 +history-faculty,145,56,0.386206896551724 +mobility-manizales,57,57,1 +mobility-medellin,413,413,1 +phone-calls,322,30,0.093167701863354 +tennis-loss,4245,1263,0.297526501766784 +us-airports,1075,926,0.861395348837209 +us-weblinks,42800,18112,0.423177570093458 +yeast-grn,6216,1225,0.197072072072072 diff --git a/combining_data.html b/combining_data.html new file mode 100644 index 0000000..557e674 --- /dev/null +++ b/combining_data.html @@ -0,0 +1,15984 @@ + + +
+ + +import networkx as nx
+import pickle as pk
+import configparser
+import matplotlib.pyplot as plt
+import pandas as pd
+import powerlaw
+import numpy as np
+config = configparser.ConfigParser()
+config.read('networks.ini')
+networks = list(config.keys())[1:]
+'''
+df = pd.DataFrame(columns=['n-nodes', 'n-edges', 'density', 'tau-metric', 'tau-ultrametric', 'ultrametric_metric_ratio',
+'LSCC-nodes', 'LSCC-edges', 'LSCC-tau-metric', 'LSCC-tau-ultrametric', 'LSCC-ultrametric_metric_ratio'], index=networks)
+
+for network in networks:
+ folder = config[network].get('folder')
+
+ rGstats = f'networks/{folder}/network-stats.csv'
+ data = pd.read_csv(rGstats, index_col=0)
+
+ for col in df.columns:
+ df[col][network] = data[network][col]
+ #break
+
+#print(df.head())
+df.to_csv('Summary/BackboneStats.csv')
+'''
+"\ndf = pd.DataFrame(columns=['n-nodes', 'n-edges', 'density', 'tau-metric', 'tau-ultrametric', 'ultrametric_metric_ratio',\n'LSCC-nodes', 'LSCC-edges', 'LSCC-tau-metric', 'LSCC-tau-ultrametric', 'LSCC-ultrametric_metric_ratio'], index=networks)\n\nfor network in networks:\n folder = config[network].get('folder')\n \n rGstats = f'networks/{folder}/network-stats.csv'\n data = pd.read_csv(rGstats, index_col=0)\n \n for col in df.columns:\n df[col][network] = data[network][col] \n #break\n\n#print(df.head())\ndf.to_csv('Summary/BackboneStats.csv')\n"
+# Save to Latex
+'''
+df['Type'] = None
+Type = {'business-faculty': 'Social',
+ 'cs-faculty': 'Social',
+ 'history-faculty': 'Social',
+ 'caviar-proj': 'Social',
+ 'celegans-her': 'Biomedical',
+ 'celegans-male': 'Biomedical',
+ 'colombia-calls': 'Social',
+ 'colombia-mobility': 'Technological',
+ 'mobility-manizales': 'Technological',
+ 'mobility-medellin': 'Technological',
+ 'tennis-loss': 'Social',
+ 'yeast-grn': 'Biomedical',
+ 'bike-sharing': 'Technological',
+ 'giraffe': 'Social',
+ 'comorbidity': 'Biomedical',
+ 'phone-calls': 'Social',
+ 'us-airports': 'Technological',
+ 'DDI': 'Biomedical',
+ 'us-weblinks': 'Technological',
+ 'host-pathogen': 'Biomedical'}
+
+for network in networks:
+ df['Type'][network] = Type[network]
+
+df.reset_index(inplace=True)
+
+df = df[['Type', 'index', 'n-nodes', 'n-edges', 'density', 'tau-metric', 'tau-ultrametric', 'ultrametric_metric_ratio',
+ 'LSCC-nodes', 'LSCC-edges', 'LSCC-tau-metric', 'LSCC-tau-ultrametric', 'LSCC-ultrametric_metric_ratio']]
+
+df.sort_values(by=['Type']).to_latex('Summary/BackboneStats.tex', column_format='l|l|rrr|c|c|c|rr|c|c|c|', index_names=False)
+'''
+"\ndf['Type'] = None\nType = {'business-faculty': 'Social',\n 'cs-faculty': 'Social',\n 'history-faculty': 'Social',\n 'caviar-proj': 'Social',\n 'celegans-her': 'Biomedical',\n 'celegans-male': 'Biomedical',\n 'colombia-calls': 'Social',\n 'colombia-mobility': 'Technological',\n 'mobility-manizales': 'Technological',\n 'mobility-medellin': 'Technological',\n 'tennis-loss': 'Social',\n 'yeast-grn': 'Biomedical',\n 'bike-sharing': 'Technological',\n 'giraffe': 'Social',\n 'comorbidity': 'Biomedical',\n 'phone-calls': 'Social',\n 'us-airports': 'Technological',\n 'DDI': 'Biomedical',\n 'us-weblinks': 'Technological',\n 'host-pathogen': 'Biomedical'}\n\nfor network in networks:\n df['Type'][network] = Type[network]\n\ndf.reset_index(inplace=True)\n\ndf = df[['Type', 'index', 'n-nodes', 'n-edges', 'density', 'tau-metric', 'tau-ultrametric', 'ultrametric_metric_ratio',\n 'LSCC-nodes', 'LSCC-edges', 'LSCC-tau-metric', 'LSCC-tau-ultrametric', 'LSCC-ultrametric_metric_ratio']]\n\ndf.sort_values(by=['Type']).to_latex('Summary/BackboneStats.tex', column_format='l|l|rrr|c|c|c|rr|c|c|c|', index_names=False)\n"
+'''
+df_wcc = pd.DataFrame(columns=['n-nodes', 'n-edges', 'n-min-edges', 'tau-metric', 'tau-min-metric', 'tau-harm-metric',
+ 'tau-ultrametric', 'tau-min-ultrametric', 'tau-harm-ultrametric'], index=networks)
+
+df_lscc = pd.DataFrame(columns=['n-nodes', 'n-edges', 'n-max-edges', 'tau-metric', 'tau-max-metric', 'tau-avg-metric',
+ 'tau-ultrametric', 'tau-max-ultrametric', 'tau-avg-ultrametric'], index=networks)
+
+for network in networks:
+ folder = config[network].get('folder')
+
+ rGstats = f'networks/{folder}/undirected_networks-stats.csv'
+ data = pd.read_csv(rGstats, index_col=0)
+
+ df_wcc['n-min-edges'][network] = data['n-edges']['min']
+ df_lscc['n-max-edges'][network] = data['n-edges']['max']
+
+ for btype in ['metric', 'ultrametric']:
+ row_name = f'tau-edges-{btype}'
+ # WCC Nets
+ for net_type in ['min', 'harm']:
+ col_name = f'tau-{net_type}-{btype}'
+ df_wcc[col_name][network] = data[row_name][net_type]
+ # LSCC Nets
+ for net_type in ['max', 'avg']:
+ col_name = f'tau-{net_type}-{btype}'
+ df_lscc[col_name][network] = data[row_name][net_type]
+
+
+ rGstats = f'networks/{folder}/network-stats.csv'
+ data = pd.read_csv(rGstats, index_col=0)
+
+ correct = ['n-', 'n-', '', '']
+ for idx, meas in enumerate(['nodes', 'edges', 'tau-metric', 'tau-ultrametric']):
+ wcc_meas = correct[idx]+meas
+ df_wcc[wcc_meas][network] = data[network][wcc_meas]
+ lscc_meas = 'LSCC-'+meas
+ df_lscc[wcc_meas][network] = data[network][lscc_meas]
+
+#print(df_wcc)
+#print(df_lscc)
+df_wcc.to_csv('Summary/BackboneCompareStats_WCC.csv')
+df_lscc.drop(index='host-pathogen',axis=0,inplace=True) # Remove SSI which does not have an LSCC
+df_lscc.to_csv('Summary/BackboneCompareStats_LSCC.csv')
+'''
+"\ndf_wcc = pd.DataFrame(columns=['n-nodes', 'n-edges', 'n-min-edges', 'tau-metric', 'tau-min-metric', 'tau-harm-metric',\n 'tau-ultrametric', 'tau-min-ultrametric', 'tau-harm-ultrametric'], index=networks)\n\ndf_lscc = pd.DataFrame(columns=['n-nodes', 'n-edges', 'n-max-edges', 'tau-metric', 'tau-max-metric', 'tau-avg-metric',\n 'tau-ultrametric', 'tau-max-ultrametric', 'tau-avg-ultrametric'], index=networks)\n\nfor network in networks:\n folder = config[network].get('folder')\n \n rGstats = f'networks/{folder}/undirected_networks-stats.csv'\n data = pd.read_csv(rGstats, index_col=0)\n \n df_wcc['n-min-edges'][network] = data['n-edges']['min']\n df_lscc['n-max-edges'][network] = data['n-edges']['max']\n \n for btype in ['metric', 'ultrametric']:\n row_name = f'tau-edges-{btype}'\n # WCC Nets\n for net_type in ['min', 'harm']:\n col_name = f'tau-{net_type}-{btype}'\n df_wcc[col_name][network] = data[row_name][net_type]\n # LSCC Nets\n for net_type in ['max', 'avg']:\n col_name = f'tau-{net_type}-{btype}'\n df_lscc[col_name][network] = data[row_name][net_type]\n \n \n rGstats = f'networks/{folder}/network-stats.csv'\n data = pd.read_csv(rGstats, index_col=0)\n \n correct = ['n-', 'n-', '', '']\n for idx, meas in enumerate(['nodes', 'edges', 'tau-metric', 'tau-ultrametric']):\n wcc_meas = correct[idx]+meas\n df_wcc[wcc_meas][network] = data[network][wcc_meas]\n lscc_meas = 'LSCC-'+meas\n df_lscc[wcc_meas][network] = data[network][lscc_meas]\n \n#print(df_wcc)\n#print(df_lscc)\ndf_wcc.to_csv('Summary/BackboneCompareStats_WCC.csv')\ndf_lscc.drop(index='host-pathogen',axis=0,inplace=True) # Remove SSI which does not have an LSCC\ndf_lscc.to_csv('Summary/BackboneCompareStats_LSCC.csv')\n"
+df = pd.read_csv('Summary/BackboneCompareStats_WCC.csv', index_col=0)
+
+fig, ax = plt.subplots(1, 2, figsize=(12, 6), sharey=True)
+
+# Metric Backbone
+ax[0].scatter(df['tau-metric'], df['tau-min-metric'], marker='v', c='r', label='Minimum')
+ax[0].scatter(df['tau-metric'], df['tau-harm-metric'], marker='o', c='k', label='Harmonic Average', alpha=0.5)
+
+ax[0].vlines(df['tau-metric'], ymin=df[['tau-min-metric', 'tau-harm-metric']].min(axis=1), ymax=df[['tau-min-metric', 'tau-harm-metric']].max(axis=1), linestyle='--', color='k')
+ax[0].plot([0, 1], [0, 1], 'k-')
+
+ax[0].legend()
+ax[0].set_xlabel('WCC Directed Network')
+ax[0].set_ylabel('Undirected Representation')
+ax[0].set_title('Metric Backbone Size')
+ax[0].set_aspect('equal')
+
+# Ultrametric Backbone
+ax[1].scatter(df['tau-ultrametric'], df['tau-min-ultrametric'], marker='v', c='r', label='Minimum')
+ax[1].scatter(df['tau-ultrametric'], df['tau-harm-ultrametric'], marker='o', c='k', label='Harmonic Average', alpha=0.5)
+
+ax[1].vlines(df['tau-ultrametric'], ymin=df[['tau-min-ultrametric', 'tau-harm-ultrametric']].min(axis=1), ymax=df[['tau-min-ultrametric', 'tau-harm-ultrametric']].max(axis=1), linestyle='--', color='k')
+ax[1].plot([0, 1], [0, 1], 'k-')
+
+ax[1].legend()
+ax[1].set_xlabel('WCC Directed Network')
+#ax[0].set_ylabel('Unirected Representation')
+ax[1].set_title('Ultrametric Backbone Size')
+ax[1].set_aspect('equal')
+
+fig.suptitle('Weakly Connected Component')
+
+plt.tight_layout()
+plt.show()
+Figure suggests that Minumun and Harmonic Average have smaller backbones than directed. How many models agree with this?
+ +n_harm = sum(df['tau-harm-metric'] <= df['tau-metric'])
+n_min = sum(df['tau-min-metric'] <= df['tau-metric'])
+
+print(n_min, n_harm, df.shape[0])
+20 19 20 ++
n_harm = sum(df['tau-harm-ultrametric'] <= df['tau-ultrametric'])
+n_min = sum(df['tau-min-ultrametric'] <= df['tau-ultrametric'])
+
+print(n_min, n_harm, df.shape[0])
+20 20 20 ++
df = pd.read_csv('Summary/BackboneCompareStats_LSCC.csv', index_col=0)
+
+fig, ax = plt.subplots(1, 2, figsize=(12, 6), sharey=True)
+
+# Metric Backbone
+ax[0].scatter(df['tau-metric'], df['tau-max-metric'], marker='^', c='b', label='Maximum')
+ax[0].scatter(df['tau-metric'], df['tau-avg-metric'], marker='s', c='g', label='Average', alpha=0.5)
+
+ax[0].vlines(df['tau-metric'], ymin=df[['tau-max-metric', 'tau-avg-metric']].min(axis=1), ymax=df[['tau-max-metric', 'tau-avg-metric']].max(axis=1), linestyle='--', color='k')
+ax[0].plot([0, 1], [0, 1], 'k-')
+
+ax[0].legend()
+ax[0].set_xlabel('LSCC Directed Network')
+ax[0].set_ylabel('Undirected Representation')
+ax[0].set_title('Metric Backbone Size')
+ax[0].set_aspect('equal')
+
+# Ultrametric Backbone
+ax[1].scatter(df['tau-ultrametric'], df['tau-max-ultrametric'], marker='^', c='b', label='Maximum')
+ax[1].scatter(df['tau-ultrametric'], df['tau-avg-ultrametric'], marker='s', c='g', label='Average', alpha=0.5)
+
+ax[1].vlines(df['tau-ultrametric'], ymin=df[['tau-max-ultrametric', 'tau-avg-ultrametric']].min(axis=1), ymax=df[['tau-max-ultrametric', 'tau-avg-ultrametric']].max(axis=1), linestyle='--', color='k')
+ax[1].plot([0, 1], [0, 1], 'k-')
+
+ax[1].legend()
+ax[1].set_xlabel('LSCC Directed Network')
+#ax[0].set_ylabel('Unirected Representation')
+ax[1].set_title('Ultrametric Backbone Size')
+ax[1].set_aspect('equal')
+
+fig.suptitle('Largest Strongly Connected Component')
+
+plt.tight_layout()
+plt.show()
+Figure suggests that Maximum and Average have larger backbones than directed. How many models agree with this?
+ +n_avg = sum(df['tau-avg-metric'] > df['tau-metric'])
+n_max = sum(df['tau-max-metric'] > df['tau-metric'])
+
+print(n_avg, n_max, df.shape[0])
+
+n_avg = sum(df['tau-avg-ultrametric'] > df['tau-ultrametric'])
+n_max = sum(df['tau-max-ultrametric'] > df['tau-ultrametric'])
+
+print(n_avg, n_max, df.shape[0])
+14 13 19 +11 11 19 ++
Both have the same number of nodes, but do they have the same number of edges even after considering directionality? +In order words, is the number of edges in the undirected graph equal to the number of edges in the LSCC?
+ +fig, ax = plt.subplots(1, 1, figsize=(6, 6))
+
+ax.loglog(df['n-edges'], 2*df['n-max-edges'], 'ro', alpha=0.3)
+ax.loglog([10, 1e6], [10, 1e6], 'k--')
+ax.set_aspect('equal')
+
+ax.set_xlabel('Number of Edges in the LSCC')
+ax.set_ylabel('Twice Number of Edges in the Undirected Graph')
+
+plt.tight_layout()
+plt.show()
+In order for an edge to be present in this undirect representation, a finite distance edge has to be present in both directions. +This is only possible in a subset of the LSCC; thus we would expect that the size of the LSCC is a lower bound on the size of this undirected representation. In other words, the points should fall below the identity line.
+ +n = sum(df['n-edges'] >= 2*df['n-max-edges'])
+print(n, df.shape[0])
+19 19 ++
'''
+from collections import Counter
+
+for network in networks:
+ folder = config[network].get('folder')
+
+ rGfile = f'networks/{folder}/network.graphml'
+ G = nx.read_graphml(rGfile)
+ LSCC = G.subgraph(max(nx.strongly_connected_components(G), key=len))
+
+ rGnets = f'networks/{folder}/undirected_networks.pickle'
+ U = pk.load(open(rGnets, 'rb'))
+
+ cc_sizes = []
+ for c in nx.connected_components(U['max']):
+ in_lscc = sum([int(LSCC.has_node(n)) for n in c])
+ if in_lscc == len(c):
+ cc_sizes.append(len(c))
+
+ print(network, Counter(cc_sizes))
+'''
+"\nfrom collections import Counter\n\nfor network in networks:\n folder = config[network].get('folder')\n \n rGfile = f'networks/{folder}/network.graphml'\n G = nx.read_graphml(rGfile)\n LSCC = G.subgraph(max(nx.strongly_connected_components(G), key=len))\n \n rGnets = f'networks/{folder}/undirected_networks.pickle'\n U = pk.load(open(rGnets, 'rb'))\n \n cc_sizes = []\n for c in nx.connected_components(U['max']):\n in_lscc = sum([int(LSCC.has_node(n)) for n in c])\n if in_lscc == len(c):\n cc_sizes.append(len(c))\n \n print(network, Counter(cc_sizes))\n"
+Within the LSCC we have nodes that have a bidirectional connection and are preserved in the undirected representation, while others are within a connected path but don't have this reciprocal relationship (eg.: A->B->C->A). We refer to those non-reciprocally connected edges as breaking points. If the undirect representation has $E_u$ edges and the LSCC has $E_s$ edges then the number of breaking points will be +\begin{equation} +n_{bp} = E_s - 2E_u +\end{equation}
+ +df = pd.read_csv('Summary/BackboneCompareStats_LSCC.csv', index_col=0)
+
+df['n-break'] = df['n-edges'] - 2*df['n-max-edges']
+
+df['rel-diff'] = (df['tau-max-metric']/df['tau-metric'] - 1)
+
+fig, ax = plt.subplots(1, 1, figsize=(6, 6))
+
+ax.semilogx(df['n-break'], df['rel-diff'], 'ro', alpha=0.3)
+ax.axhline(0.0, linestyle='--', color='k')
+#ax.loglog([10, 1e6], [10, 1e6], 'k--')
+#ax.set_aspect('equal')
+
+ax.set_xlabel('Number of Breaking points')
+ax.set_ylabel('Relative difference in backbone size')
+
+plt.tight_layout()
+plt.show()
+'''
+df = pd.DataFrame(index=['n-nodes', 'nd-edges', 'nu-edges', 'tau-metric', 'tau-ultrametric',
+ 'tau-avg-metric', 'tau-avg-ultrametric', 'tau-max-metric', 'tau-max-ultrametric'])
+
+for network in networks:
+ folder = config[network].get('folder')
+ data = pd.read_csv(f'networks/{folder:s}/undirected_networks-stats.csv', index_col=0).T
+
+ for id in data.columns:
+ if data[id].min() < 1.0:
+ df[f'{network}-{id}'] = data[id]
+
+df.T.to_csv('Summary/BackboneCompareStats_Components.csv')
+'''
+"\ndf = pd.DataFrame(index=['n-nodes', 'nd-edges', 'nu-edges', 'tau-metric', 'tau-ultrametric', \n 'tau-avg-metric', 'tau-avg-ultrametric', 'tau-max-metric', 'tau-max-ultrametric'])\n\nfor network in networks:\n folder = config[network].get('folder')\n data = pd.read_csv(f'networks/{folder:s}/undirected_networks-stats.csv', index_col=0).T\n \n for id in data.columns:\n if data[id].min() < 1.0:\n df[f'{network}-{id}'] = data[id]\n \ndf.T.to_csv('Summary/BackboneCompareStats_Components.csv')\n"
+df = pd.read_csv('Summary/BackboneCompareStats_Components.csv', index_col=0)
+
+fig, ax = plt.subplots(1, 2, figsize=(12, 6), sharey=True)
+
+# Metric Backbone
+#ax[0].scatter(df['tau-metric'], df['tau-max-metric'], marker='^', c='b', label='Maximum')
+ax[0].scatter(df['tau-metric'], df['tau-avg-metric'], marker='s', c='g', label='Average', s=50*np.log10(df['n-nodes']), alpha=0.5)
+
+#ax[0].vlines(df['tau-metric'], ymin=df[['tau-max-metric', 'tau-avg-metric']].min(axis=1), ymax=df[['tau-max-metric', 'tau-avg-metric']].max(axis=1), linestyle='--', color='k')
+ax[0].plot([0, 1], [0, 1], 'k-')
+
+ax[0].legend()
+ax[0].set_xlabel('SCCs Directed Network')
+ax[0].set_ylabel('Undirected Representation')
+ax[0].set_title('Metric Backbone Size')
+ax[0].set_aspect('equal')
+
+# Ultrametric Backbone
+ax[1].scatter(df['tau-ultrametric'], df['tau-max-ultrametric'], marker='^', c='b', label='Maximum', s=50*np.log10(df['n-nodes']), alpha=0.5)
+#ax[1].scatter(df['tau-ultrametric'], df['tau-avg-ultrametric'], marker='s', c='g', label='Average', alpha=0.5)
+
+#ax[1].vlines(df['tau-ultrametric'], ymin=df[['tau-max-ultrametric', 'tau-avg-ultrametric']].min(axis=1), ymax=df[['tau-max-ultrametric', 'tau-avg-ultrametric']].max(axis=1), linestyle='--', color='k')
+ax[1].plot([0, 1], [0, 1], 'k-')
+
+ax[1].legend()
+ax[1].set_xlabel('SCCs Directed Network')
+#ax[0].set_ylabel('Unirected Representation')
+ax[1].set_title('Ultrametric Backbone Size')
+ax[1].set_aspect('equal')
+
+fig.suptitle('Strongly Connected Components')
+
+plt.tight_layout()
+plt.show()
+df = pd.read_csv('Summary/BackboneCompareStats_LargestComponent.csv', index_col=0)
+
+fig, ax = plt.subplots(1, 2, figsize=(12, 6), sharey=True)
+
+# Metric Backbone
+#ax[0].scatter(df['tau-metric'], df['tau-max-metric'], marker='^', c='b', label='Maximum')
+ax[0].scatter(df['tau-metric'], df['tau-avg-metric'], marker='s', c='g', label='Average', s=50*np.log10(df['n-nodes']), alpha=0.5)
+
+#ax[0].vlines(df['tau-metric'], ymin=df[['tau-max-metric', 'tau-avg-metric']].min(axis=1), ymax=df[['tau-max-metric', 'tau-avg-metric']].max(axis=1), linestyle='--', color='k')
+ax[0].plot([0, 1], [0, 1], 'k-')
+
+ax[0].legend()
+ax[0].set_xlabel('LSCC Directed Network')
+ax[0].set_ylabel('Undirected Representation')
+ax[0].set_title('Metric Backbone Size')
+ax[0].set_aspect('equal')
+
+# Ultrametric Backbone
+ax[1].scatter(df['tau-ultrametric'], df['tau-max-ultrametric'], marker='^', c='b', label='Maximum', s=50*np.log10(df['n-nodes']), alpha=0.5)
+#ax[1].scatter(df['tau-ultrametric'], df['tau-avg-ultrametric'], marker='s', c='g', label='Average', alpha=0.5)
+
+#ax[1].vlines(df['tau-ultrametric'], ymin=df[['tau-max-ultrametric', 'tau-avg-ultrametric']].min(axis=1), ymax=df[['tau-max-ultrametric', 'tau-avg-ultrametric']].max(axis=1), linestyle='--', color='k')
+ax[1].plot([0, 1], [0, 1], 'k-')
+
+ax[1].legend()
+ax[1].set_xlabel('LSCC Directed Network')
+#ax[0].set_ylabel('Unirected Representation')
+ax[1].set_title('Ultrametric Backbone Size')
+ax[1].set_aspect('equal')
+
+fig.suptitle('Largest Strongly Connected Component')
+
+plt.tight_layout()
+plt.show()
+df = pd.read_csv('Summary/BackboneCompareStats_LargestComponent.csv', index_col=0)
+
+fig, ax = plt.subplots(1, 2, figsize=(12, 6), sharey=True)
+
+# Metric Backbone
+#ax[0].scatter(df['tau-metric'], df['tau-max-metric'], marker='^', c='b', label='Maximum')
+ax[0].scatter(df['tau-metric'], df['tau-avg-metric'], marker='s', c='g', label='Average', s=50*np.log10(df['n-nodes']), alpha=0.5)
+
+#ax[0].vlines(df['tau-metric'], ymin=df[['tau-max-metric', 'tau-avg-metric']].min(axis=1), ymax=df[['tau-max-metric', 'tau-avg-metric']].max(axis=1), linestyle='--', color='k')
+ax[0].plot([0, 1], [0, 1], 'k-')
+
+ax[0].set_xlim((0, 0.1))
+ax[0].set_ylim((0, 0.1))
+
+# Ultrametric Backbone
+ax[1].scatter(df['tau-ultrametric'], df['tau-max-ultrametric'], marker='^', c='b', label='Maximum', s=50*np.log10(df['n-nodes']), alpha=0.5)
+#ax[1].scatter(df['tau-ultrametric'], df['tau-avg-ultrametric'], marker='s', c='g', label='Average', alpha=0.5)
+
+#ax[1].vlines(df['tau-ultrametric'], ymin=df[['tau-max-ultrametric', 'tau-avg-ultrametric']].min(axis=1), ymax=df[['tau-max-ultrametric', 'tau-avg-ultrametric']].max(axis=1), linestyle='--', color='k')
+ax[1].plot([0, 1], [0, 1], 'k-')
+
+ax[1].set_xlim((0, 0.1))
+ax[1].set_ylim((0, 0.1))
+
+plt.tight_layout()
+plt.show()
+df = pd.read_csv('Summary/CompareSize_LSCC_WCC.csv', index_col=0)
+display(df)
+| + | N_WCC | +N_LSCC | +Percent | +
|---|---|---|---|
| Network | ++ | + | + |
| bike-sharing | +725 | +723 | +0.997241 | +
| business-faculty | +113 | +84 | +0.743363 | +
| caviar-proj | +110 | +63 | +0.572727 | +
| celegans-her | +313 | +215 | +0.686901 | +
| celegans-male | +328 | +238 | +0.725610 | +
| colombia-calls | +863 | +863 | +1.000000 | +
| colombia-mobility | +863 | +863 | +1.000000 | +
| comorbidity | +95 | +95 | +1.000000 | +
| cs-faculty | +206 | +78 | +0.378641 | +
| DDI | +412 | +412 | +1.000000 | +
| giraffe | +6 | +6 | +1.000000 | +
| history-faculty | +145 | +56 | +0.386207 | +
| mobility-manizales | +57 | +57 | +1.000000 | +
| mobility-medellin | +413 | +413 | +1.000000 | +
| phone-calls | +322 | +30 | +0.093168 | +
| tennis-loss | +4245 | +1263 | +0.297527 | +
| us-airports | +1075 | +926 | +0.861395 | +
| us-weblinks | +42800 | +18112 | +0.423178 | +
| yeast-grn | +6216 | +1225 | +0.197072 | +
+