-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgraph.py
More file actions
217 lines (158 loc) · 6.22 KB
/
graph.py
File metadata and controls
217 lines (158 loc) · 6.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
from slide import Slide
from score import transition_score
class Graph(object):
def __init__(self):
self.nodes = {}
self.max_recursion = 4
def set_max_recursion(self, max_recursion):
self.max_recursion = max_recursion
def get_max_recursion(self):
return self.max_recursion
def add_node(self, node):
self.nodes[node.uid] = node
def add_link(self, uid_1, uid_2):
node_1 = self.nodes[uid_1]
node_2 = self.nodes[uid_2]
node_1.add_neighbour(node_2)
def del_link(self, uid_1, uid_2):
node_1 = self.nodes[uid_1]
node_2 = self.nodes[uid_2]
node_1.del_neighbour(node_2)
def remove_node(self, uid):
neighbours_uid = []
for neighbour_uid, neighbour in self.nodes[uid].neighbours.items():
# Do not edit inplace
neighbours_uid.append(neighbour_uid)
for neighbour_uid in neighbours_uid:
self.del_link(uid, neighbour_uid)
self.del_link(neighbour_uid, uid)
def get_best_neighbour(self, uid):
node = self.nodes[uid]
best_neighbour = None
best_neighbour_reachable_nodes = 0
for neighbour_uid, neighbour in node.neighbours.items():
if best_neighbour is None:
best_neighbour = neighbour
best_neighbour_reachable_nodes = self.get_reachable_node(best_neighbour.node.uid)
elif len(neighbour.node.neighbours) == 2:
# If a neighbour only has another neighbour, do not leave him alone!
best_neighbour = neighbour
break
elif len(self.get_reachable_node(neighbour.node.uid)) > len(
best_neighbour_reachable_nodes):
best_neighbour = neighbour
best_neighbour_reachable_nodes = self.get_reachable_node(best_neighbour.node.uid)
if best_neighbour is not None:
best_neighbour = best_neighbour.node.uid
return best_neighbour
def get_reachable_node(self, uid, max_recursion=None, ignored_nodes=None):
reachable_nodes = set()
if max_recursion is None:
max_recursion = self.get_max_recursion()
if ignored_nodes is None:
ignored_nodes = set()
starting_node = self.nodes[uid]
reachable_nodes.add(starting_node)
ignored_nodes.add(starting_node)
if max_recursion <= 0:
return reachable_nodes
for neighbour_uid, neighbour in starting_node.neighbours.items():
if neighbour_uid in ignored_nodes:
continue
else:
reachable_nodes |= self.get_reachable_node(neighbour_uid,
max_recursion=(max_recursion - 1),
ignored_nodes=ignored_nodes)
return reachable_nodes
def break_links(self, filter_fn):
links_to_del = []
for node_uid, node in self.nodes.items():
for neighbour_uid, neighbour in node.neighbours.items():
if not filter_fn(node, neighbour):
links_to_del.append((node_uid, neighbour_uid))
for node_uid, neighbour_uid in links_to_del:
self.del_link(node_uid, neighbour_uid)
def count_links(self):
link_count = 0
for node_uid, node in self.nodes.items():
link_count += len(node.neighbours)
return link_count
def clean_dead_end(self):
dead_end = []
for uid, node in self.nodes.items():
if len(node.neighbours) == 1:
dead_end.append(uid)
for uid in dead_end:
self.remove_node(uid)
class Node(object):
def __init__(self, uid, slide):
self.uid = uid
self._slide = slide
self.neighbours = {}
def get_slide(self):
return self._slide
def add_neighbour(self, node):
if node.uid not in self.neighbours:
self.neighbours[node.uid] = Neighbour(self, node)
def del_neighbour(self, node):
if node.uid in self.neighbours:
del self.neighbours[node.uid]
class Neighbour(object):
def __init__(self, current_node, neighbour_node):
self.node = neighbour_node
self.weight = transition_score(current_node.get_slide(), neighbour_node.get_slide())
def build_graph(pics, pics_per_tag):
graph = Graph()
# Add node to graph
for uid, pic in pics.items():
node = Node(uid, Slide(pic))
graph.add_node(node)
# Link nodes in graph
for tag, tag_pics in pics_per_tag.items():
for pic_id_1 in tag_pics:
pic_1 = pics[pic_id_1]
for pic_id_2 in tag_pics:
pic_2 = pics[pic_id_2]
if pic_id_1 != pic_id_2:
graph.add_link(pic_1.id, pic_2.id)
return graph
def crawl_graph(graph, starting_node_uid, recursion_strategy=None):
if recursion_strategy is None:
recursion_strategy = {
0: 4,
5000: 4,
15000: 4, #3
20000: 4, #4
25000: 4, #4
30000: 4, #5
35000: 4, #6
40000: 4, #7
45000: 4, #8
}
path = []
current_node_uid = starting_node_uid
path.append(current_node_uid)
keep_going = True
i = 0
while keep_going:
if i % 2000 == 0 and i != 0:
print('looking for node %s' % i)
from collections import Counter
occurrences = []
for uid, node in graph.nodes.items():
occurrences.append(len(node.neighbours))
# counter = Counter(occurrences)
# print('%s (path) -> %s' % (len(path), sorted(counter.most_common(), key=lambda x: x[0])))
# update recursion strategy
if i in recursion_strategy:
graph.set_max_recursion(recursion_strategy[i])
next_node_uid = graph.get_best_neighbour(current_node_uid)
if next_node_uid is None:
keep_going = False
else:
# Cannot move backward to selected node
graph.remove_node(current_node_uid)
current_node_uid = next_node_uid
path.append(current_node_uid)
i += 1
return path