-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathfacebook.py
More file actions
executable file
·164 lines (139 loc) · 5.3 KB
/
facebook.py
File metadata and controls
executable file
·164 lines (139 loc) · 5.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#!/usr/bin/env python
import networkx as nx
import numpy as np
import glob
import os, os.path
import math
pathhack = os.path.dirname(os.path.realpath(__file__))
feat_file_name = "%s/feature_map.txt" % (pathhack,)
feature_index = {} #numeric index to name
inverted_feature_index = {} #name to numeric index
network = nx.Graph()
ego_nodes = []
def parse_featname_line(line):
line = line[(line.find(' '))+1:] # chop first field
split = line.split(';')
name = ';'.join(split[:-1]) # feature name
index = int(split[-1].split(" ")[-1]) #feature index
return index, name
def load_features():
# may need to build the index first
if not os.path.exists(feat_file_name):
feat_index = {}
# build the index from data/*.featnames files
featname_files = glob.iglob("%s/data/*.featnames" % (pathhack,))
for featname_file_name in featname_files:
featname_file = open(featname_file_name, 'r')
for line in featname_file:
# example line:
# 0 birthday;anonymized feature 376
index, name = parse_featname_line(line)
feat_index[index] = name
featname_file.close()
keys = feat_index.keys()
keys.sort()
out = open(feat_file_name,'w')
for key in keys:
out.write("%d %s\n" % (key, feat_index[key]))
out.close()
# index built, read it in (even if we just built it by scanning)
global feature_index
global inverted_feature_index
index_file = open(feat_file_name,'r')
for line in index_file:
split = line.strip().split(' ')
key = int(split[0])
val = split[1]
feature_index[key] = val
index_file.close()
for key in feature_index.keys():
val = feature_index[key]
inverted_feature_index[val] = key
def load_nodes():
assert len(feature_index) > 0, "call load_features() first"
global network
global ego_nodes
# get all the node ids by looking at the files
ego_nodes = [int(x.split("/")[-1].split('.')[0]) for x in glob.glob("%s/data/*.featnames" % (pathhack,))]
node_ids = ego_nodes
# parse each node
for node_id in node_ids:
featname_file = open("%s/data/%d.featnames" % (pathhack,node_id), 'r')
feat_file = open("%s/data/%d.feat" % (pathhack,node_id), 'r')
egofeat_file = open("%s/data/%d.egofeat" % (pathhack,node_id), 'r')
edge_file = open("%s/data/%d.edges" % (pathhack,node_id), 'r')
# parse ego node
network.add_node(node_id)
# 0 1 0 0 0 ...
ego_features = [int(x) for x in egofeat_file.readline().split(' ')]
i = 0
network.node[node_id]['features'] = np.zeros(len(feature_index))
for line in featname_file:
key, val = parse_featname_line(line)
network.node[node_id]['features'][key] = ego_features[i] + 1
i += 1
# parse neighboring nodes
for line in feat_file:
featname_file.seek(0)
split = [int(x) for x in line.split(' ')]
node_id = split[0]
features = split[1:]
network.add_node(node_id)
network.node[node_id]['features'] = np.zeros(len(feature_index))
i = 0
for line in featname_file:
key, val = parse_featname_line(line)
network.node[node_id]['features'][key] = features[i]
i += 1
featname_file.close()
feat_file.close()
egofeat_file.close()
edge_file.close()
def load_edges():
global network
assert network.order() > 0, "call load_nodes() first"
edge_file = open("%s/facebook_combined.txt" % (pathhack,),"r")
for line in edge_file:
# nodefrom nodeto
split = [int(x) for x in line.split(" ")]
node_from = split[0]
node_to = split[1]
network.add_edge(node_from, node_to)
def load_network():
"""
Load the network. After calling this function, facebook.network points to a networkx object for the facebook data.
"""
load_features()
load_nodes()
load_edges()
def feature_matrix():
n_nodes = network.number_of_nodes()
n_features = len(feature_index)
X = np.zeros((n_nodes, n_features))
for i,node in enumerate(network.nodes()):
X[i,:] = network.node[node]['features']
return X
def universal_feature(feature_index):
"""
Does every node have this feature?
"""
return len([x for x in network.nodes_iter() if network.node[x]['feautures'][feature_index] > 0]) // network.order() == 1
if __name__ == '__main__':
print "Running tests."
print "Loading network..."
load_network()
print "done."
failures = 0
def test(actual, expected, test_name):
global failures #lol python scope
try:
print "testing %s..." % (test_name,)
assert actual == expected, "%s failed (%s != %s)!" % (test_name,actual, expected)
print "%s passed (%s == %s)." % (test_name,actual,expected)
except AssertionError as e:
print e
failures += 1
test(network.order(), 4039, "order")
test(network.size(), 88234, "size")
test(round(nx.average_clustering(network),4), 0.6055, "clustering")
print "%d tests failed." % (failures,)