-
Notifications
You must be signed in to change notification settings - Fork 367
Expand file tree
/
Copy pathmultisoft-normed.py
More file actions
executable file
·155 lines (133 loc) · 4.87 KB
/
multisoft-normed.py
File metadata and controls
executable file
·155 lines (133 loc) · 4.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import sys
import numpy as np
import numpy.random as nr
from math import exp, log
def sumprod_brute(elts, size, fixed = -1):
if size > len(elts):
return 0
if fixed >= 0 and fixed < len(elts):
if size == 0:
return 0
z = 0
for s in xrange(size):
z += sumprod_brute(elts[:fixed], s) * sumprod_brute(elts[fixed+1:], size - 1 - s)
return z * exp(elts[fixed])
if size == 0:
return 1
return exp(elts[0]) * sumprod_brute(elts[1:], size - 1) + sumprod_brute(elts[1:], size)
# Returns sum over all subsets of given size of product
# of exp of elements.
# Also returns, for each index, the said sum given that the element
# at that index is in the subset.
def sumprod(elts, size, fixed = -1):
N = len(elts)
B = np.zeros((size + 1, N + 1)) # Backward lattice
B[0, N] = 1
logBNorms = np.zeros(N + 1)
# Backward pass
for i in xrange(N - 1, -1, -1):
B[0, i] = (i >= size and i > fixed) * B[0, i + 1] # This can get quite small
for s in xrange(max(1, size - i), size + 1):
B[s, i] = B[s - 1, i + 1] * exp(elts[i]) + B[s, i + 1] * (fixed != i)
norm = B[:,i].sum()
B[:,i] /= norm
logBNorms[i] = log(norm) + logBNorms[i + 1]
F = np.zeros((size + 1,)) # Forward column
F[0] = 1
# Forward pass
# Compute y_j for each j (marginal prob)
y = np.zeros(N)
logFNorm = -logBNorms[0] # Subtract log partition function
for i in xrange(1, N + 1):
for s in xrange(size, -1, -1):
if s < size:
y[i - 1] += F[s] * B[size - 1 - s, i]
if s > 0:
F[s] = F[s - 1] * exp(elts[i - 1]) + F[s] * (fixed != i - 1)
elif fixed == i - 1:
F[0] = 0
norm = F.sum()
F /= norm
y[i - 1] *= exp(elts[i - 1] + logBNorms[i] + logFNorm)
logFNorm += log(norm)
return y
# Computes log(exp(x) + exp(y))
def logadd(x, y):
if x == -np.inf and y == -np.inf:
return -np.inf
M = max(x,y)
m = min(x,y)
diff = M - m
return M if diff >= 15 else M + log(1 + exp(-diff))
# Returns sum over all subsets of given size of product
# of exp of elements.
# Also returns, for each index, the said sum given that the element
# at that index is in the subset.
def sumprod_logspace(elts, size, fixed = -1):
N = len(elts)
logB = -np.inf * np.ones((size + 1, N + 1)) # Backward lattice
logB[0, :] = 0
if fixed >= 0:
logB[0, :fixed + 1] = -np.inf
# Backward pass
for i in xrange(N - 1, -1, -1):
for s in xrange(max(1, size - i), size + 1):
logB[s, i] = logadd(logB[s - 1, i + 1] + elts[i], logB[s, i + 1] if fixed != i else -np.inf)
logF = -np.inf * np.ones((size + 1,)) # Forward column
logF[0] = 0
# Forward pass
# Compute y_j for each j (marginal prob)
logy = -np.inf * np.ones(N)
logFNorm = -logB[size, 0] # Subtract log partition function
for i in xrange(1, N + 1):
for s in xrange(size, -1, -1):
if s < size:
logy[i - 1] = logadd(logy[i - 1], logF[s] + logB[size - 1 - s, i])
if s > 0:
logF[s] = logadd(logF[s - 1] + elts[i - 1], logF[s] if fixed != i - 1 else -np.inf)
elif fixed == i - 1:
logF[0] = -np.inf
logy[i - 1] += elts[i - 1] + logFNorm
return np.exp(logy)
# Checks the gradient with respect to the objective
# E = log(y_i)
# where y_i = z_i/Z and i = the index of the correct label
def check_grad(elts, size, correct=0):
eps = 0.01
N = len(elts)
y = sumprod_logspace(elts, size)
Cy = sumprod_logspace(elts, size, fixed=correct)
grad = Cy - y
print "Analytic gradient: "
print grad
grad_num = np.zeros_like(grad)
for i in xrange(N):
tmp = elts[i]
elts[i] += eps
y_n = sumprod_logspace(elts, size)
grad_num[i] = (log(y_n[correct]) - log(y[correct])) / eps
elts[i] = tmp
print "Numeric gradient: "
print grad_num
if __name__ == "__main__":
nr.seed(2)
N = 5 # The number of outputs in the softmax
size = 2 # The size of the multisoft set
fixed = -2 # Force this index to be on (negative = don't)
elts = nr.randn(N)
elts -= elts.max()
elts = np.array([-0.071459650993347, -0.517264485359192, -0.128548145294189, -0.113207340240479 ,0.000000000000000])
print elts
dp_y = sumprod_logspace(elts, size, fixed=fixed)
bf_Z = sumprod_brute(elts, size, fixed=fixed)
print "Brute force Z: %f" % bf_Z
print "Brute force z/Z:"
bf_z = np.zeros(N)
for i in xrange(N):
for s in xrange(size):
bf_z[i] += sumprod_brute(elts[:i], s, fixed=fixed) * sumprod_brute(elts[i+1:], size - 1 - s, fixed=fixed-i-1)
bf_z[i] *= exp(elts[i])
print bf_z / bf_Z
print "DP z/Z:"
print dp_y
check_grad(elts, size, correct=3)