This repository was archived by the owner on Aug 24, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathanalyzeEdgeWeights.py
More file actions
executable file
·116 lines (93 loc) · 3.13 KB
/
analyzeEdgeWeights.py
File metadata and controls
executable file
·116 lines (93 loc) · 3.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env python
'''
CREATED:2012-03-28 08:26:04 by Brian McFee <bmcfee@cs.ucsd.edu>
Analyze the edge weights in a hypergraph model
Usage:
./analyzeEdgeWeights.py results_IN.pickle CATEGORIES weight_distribution_out.csv
'''
import sys
import re
import cPickle as pickle
import collections
import pprint
def buildREs():
# first, check to see if it's a compound edge: grep for -&-
# if not, it's one of the following:
# __UNIFORM => Uniform
# (YEAR|DECADE)_XXXX => Era
# LYRICS-XX.XX => Lyrics
# AUDIO-XXXX.XXXX => Audio
# FAMILIAR_(LOW|MED|HIGH) => Familiarity
# CF-XXXX.XXXX => CF
# ??? => Tags
#
# if it is compound, it's the following:
# (YEAR|DECADE)_XXXX-&-(YEAR|DECADE)_XXXX => Era
# Lyrics-XX.XX-&-Lyrics-XX.XX => Lyrics
# ...
#
# Order in this dictionary is important
P = {}
P[' Uniform'] = '__UNIFORM'
P['Audio'] = 'AUDIO-\d{4}\.\d{4}'
P['CF'] = 'CF-\d{4}\.\d{4}'
P['Era'] = '(YEAR|DECADE)_\d{4}'
P['Familiarity'] = 'FAMILIARITY_(LOW|MED|HIGH)'
P['Lyrics'] = 'LYRICS-\d{2}\.\d{2}'
simple = P.keys()
simple.sort()
simple = simple[1:]
# And now for the tag features...
for i in xrange(len(simple)):
P['%s-Tags' % simple[i]] = '^((.*?-&-%s)|(%s-&-.*))$' % (P[simple[i]], P[simple[i]])
for j in xrange(i+1, len(simple)):
P[simple[i]+'-'+simple[j]] = '^((%s-&-%s)|(%s-&-%s))$' % (P[simple[i]], P[simple[j]], P[simple[j]], P[simple[i]])
P[simple[i]] = '^(%s|(%s-&-%s))$' % (P[simple[i]], P[simple[i]], P[simple[i]])
pass
P['Tags'] = '.*'
R = {}
for k in P:
R[k] = re.compile(P[k])
pass
return R
def parseWeights(R, weights):
keys = R.keys()
keys.sort()
# A = {}
A = collections.defaultdict(lambda: 0.0)
for (edge, w) in weights.iteritems():
found = False
for k in keys:
if R[k].match(edge):
A[k] += w
found = True
if found:
break
pass
if not found:
print 'ERROR: could not match edge: ', edge
pass
return A
def crunchWeights(R, model_in, categories_in, csv_out):
with open(model_in, 'r') as f:
weights = pickle.load(f)['weights']
pass
with open(categories_in, 'r') as f:
cats = [s.strip() for s in f.readlines()]
pass
edges = R.keys()
edges.sort()
with open(csv_out, 'w') as f:
f.write('CATEGORY\\EDGE,%s\n' % ','.join(edges))
for c in cats:
# W = parseWeights(R, weights[c])
W = parseWeights(R, weights[c][0])
s =','.join(['%f' % W[x] for x in edges])
f.write('%s,%s\n' % (c, s))
pass
pass
pass
if __name__ == '__main__':
R = buildREs()
crunchWeights(R, sys.argv[1], sys.argv[2], sys.argv[3])
pass