forked from artiste-qb-net/Quantum_Edward
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTimeStep.py
More file actions
106 lines (91 loc) · 3.66 KB
/
TimeStep.py
File metadata and controls
106 lines (91 loc) · 3.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import numpy as np
class TimeStep:
"""
An iteration or time step 't' is each time the parameters lambda =
list1_conc0, list1_conc1 and the function of lambda being MAXIMIZED (
ELBO) are changed. This class calculates the change in lambda,
delta lambda, for a single iteration at the current time t=cur_t. There
are various possible methods for calculating delta lambda. A nice
description of the various methods can be found in the Wikipedia article
(cited below) for "Stochastic Gradient Descent" (in our case, it's an
ascent, not a descent)
In conventional Artificial Neural Net algorithms, one is minimizing
Cost, so change in Cost must be negative. Here, we are trying to
maximize ELBO so the change in ELBO must be positive. In both cases,
eta > 0 (eta is a scalar factor multiplying delta lambda). But replace
eta by - eta to go from time step of Cost to that of ELBO or vice versa.
References
----------
https://en.wikipedia.org/wiki/Stochastic_gradient_descent
"""
def __init__(self, method, eta, len1):
"""
Constructor
Parameters
----------
method : str
A string that identifies the method of calculating delta lambda.
For example, method = 'adam'
eta : float
positive scalar, delta lambda is proportional to it.
len1 : int
length of a list1
Returns
-------
None
"""
self.method = method
self.eta = eta
# These are used by successive calls to get_delta_conc()
self.list1_cum_grad = [None]*len1
self.list1_cum_sq_grad = [None]*len1
def get_delta_conc(self, grad0, grad1, cur_t, k):
"""
Change in lambda = concentrations conc0 and conc1. grad0 and grad1
are the gradients of ELBO at time t=cur_t with respect to conc0 and
conc1, respectively. ELBO is being maximized. 'k' is the layer being
considered.
Parameters
----------
grad0 : np.array
grad1 : np.array
cur_t : int
k : int
Returns
-------
np.array
"""
method = self.method
grad = np.stack([grad0, grad1])
if method == 'naive':
return self.eta*grad
elif method == 'naive_t':
return (self.eta/(cur_t+1))*grad
elif method == 'mag1_grad':
# mag_grad = magnitude of gradient
mag_grad = np.sqrt(np.square(grad0) +
np.square(grad0))
return np.divide(self.eta*grad, mag_grad)
elif method == 'ada_grad':
assert cur_t is not None
if cur_t == 0:
self.list1_cum_sq_grad[k] = np.square(grad)
# print("**************", self.list1_cum_sq_grad[k])
else:
# print('..', cur_t, self.list1_cum_sq_grad[k])
self.list1_cum_sq_grad[k] += np.square(grad)
return np.divide(self.eta * grad,
np.sqrt(self.list1_cum_sq_grad[k]) + 1e-6)
elif method == 'adam':
assert cur_t is not None
if cur_t == 0:
self.list1_cum_grad[k] = grad
self.list1_cum_sq_grad[k] = np.square(grad)
else:
self.list1_cum_grad[k] += grad
self.list1_cum_sq_grad[k] += np.square(grad)
return np.divide(np.multiply(self.eta * grad,
self.list1_cum_grad[k]),
np.sqrt(self.list1_cum_sq_grad[k]) + 1e-6)
else:
assert False, "unsupported time step method"