phreeza · ghost · Jul 25, 2017 · Jul 25, 2017 · Jul 25, 2017 · Jul 26, 2017
diff --git a/README.md b/README.md
@@ -1,9 +1,30 @@
 # tensorflow-vrnn
 A variational recurrent neural network as described in:
 
-Chung, J., Kastner, K., Dinh, L., Goel, K., Courville, A. C., & Bengio, Y. (2015). A recurrent latent variable model for sequential data. In Advances in neural information processing systems (pp. 2980-2988).
+[Chung, J., Kastner, K., Dinh, L., Goel, K., Courville, A. C., & Bengio, Y. (2015). A recurrent latent variable model for sequential data. In Advances in neural information processing systems (pp. 2980-2988).](https://arxiv.org/abs/1506.02216)
 
+## Requirements
+python == 3.5
+tensorflow == 1.2.1
+numpy==1.13.1
 
-![VRNN Structure](graph1.png?raw=true "VRNN Structure")
+## main.py
+* train this model
+```python
+python main.py
+```
+## cell.py
+* **VRNNCell** structure
+
+## utils.py
+* Basic functions implementation
+
+## ops.py
+* Basic operations based on tensorflow
 
+## config.py
+* Basic configuration of model
+* Every configuration can be changed here.
+
+![VRNN Structure](graph1.png?raw=true "VRNN Structure")
 ![Global Structure](graph2.png?raw=true "Global Structure")
diff --git a/cell.py b/cell.py
@@ -0,0 +1,69 @@
+from ops import fc_layer, get_shape, print_vars
+import tensorflow as tf
+import numpy as np
+
+class VRNNCell(tf.nn.rnn_cell.RNNCell):
+    """Variational RNN cell."""
+
+    def __init__(self, x_dim, h_dim, z_dim = 100):
+        '''
+        Args:
+            x_dim - chunk_samples
+            h_dim - rnn_size
+            z_dim - latent_size
+        '''
+        self.n_h = h_dim
+        self.n_x = x_dim
+        self.n_z = z_dim
+        self.n_x_1 = x_dim
+        self.n_z_1 = z_dim
+        self.n_enc_hidden = z_dim
+        self.n_dec_hidden = x_dim
+        self.n_prior_hidden = z_dim
+        self.lstm = tf.nn.rnn_cell.LSTMCell(self.n_h, state_is_tuple=True)
+
+    @property
+    def state_size(self):
+        return (self.n_h, self.n_h)
+
+    @property
+    def output_size(self):
+        return self.n_h
+
+    def __call__(self, x, state, scope=None):
+        '''
+		Args:
+			x - input 2D tensor [batch_size x 2*self.chunk_samples]
+			state - tuple
+				(hidden, cell_state)
+			scope - string
+				defaults to be None
+    	'''
+        with tf.variable_scope(scope or type(self).__name__):
+            h, c = state
+            with tf.variable_scope("Prior"):
+                prior_hidden = fc_layer(h, self.n_prior_hidden, activation = tf.nn.relu, scope = "hidden")
+                prior_mu = fc_layer(prior_hidden, self.n_z, scope = "mu")
+                prior_sigma = fc_layer(prior_hidden, self.n_z, activation = tf.nn.softplus, scope = "sigma")# >=0
+
+            x_1 = fc_layer(x, self.n_x_1, activation = tf.nn.relu, scope = "phi_x")# >=0
+
+            with tf.variable_scope("Encoder"):
+                enc_hidden = fc_layer(tf.concat(values=(x_1, h), axis=1), self.n_enc_hidden, activation = tf.nn.relu, scope = "hidden")
+                enc_mu = fc_layer(enc_hidden, self.n_z, scope = 'mu')
+                enc_sigma = fc_layer(enc_hidden, self.n_z, activation = tf.nn.softplus, scope = 'sigma')
+
+            # Random sampling ~ N(0, 1)
+            eps = tf.random_normal((get_shape(x)[0], self.n_z), 0.0, 1.0, dtype=tf.float32)
+            # z = mu + sigma*epsilon, latent variable from reparametrization trick
+            z = tf.add(enc_mu, tf.multiply(enc_sigma, eps))
+            z_1 = fc_layer(z, self.n_z_1, activation = tf.nn.relu, scope = "phi_z")
+
+            with tf.variable_scope("Decoder"):
+                dec_hidden = fc_layer(tf.concat(values=(z_1, h), axis=1), self.n_dec_hidden, activation = tf.nn.relu, scope = "hidden")
+                dec_mu = fc_layer(dec_hidden, self.n_x, scope = "mu")
+                dec_sigma = fc_layer(dec_hidden, self.n_x, activation = tf.nn.softplus, scope = "sigma")
+
+            output, next_state = self.lstm(tf.concat(values=(x_1, z_1), axis=1), state)
+
+        return (enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma), next_state
diff --git a/config.py b/config.py
@@ -0,0 +1,22 @@
+#=================================PATH=========================#
+
+SAVE_DIR = './save/'
+
+#======================VRNN configuration=======================#
+
+class VRNNConfig(object):
+    def __init__(self):
+        self.rnn_size = 3 # num of hidden states in RNN
+        self.latent_size = 3 # size of latent space
+
+        self.seq_length = 100 # RNN sequence length
+        self.chunk_samples = 1 # number of samples per mdct chunk
+
+        self.num_epochs = 5
+        self.batch_size = 3000
+        self.n_batches = 100
+        self.log_every = 20
+
+        self.grad_clip = 10 # clip gradients at this value
+        self.decay_rate = 1.
+        self.lr = 0.0005 # initial learning_rate
diff --git a/main.py b/main.py
@@ -0,0 +1,249 @@
+from utils import create_dir, pickle_save
+from config import SAVE_DIR, VRNNConfig
+from datetime import datetime
+from ops import print_vars
+from cell import VRNNCell
+
+import tensorflow as tf
+import numpy as np
+import logging
+import pickle
+import os
+
+logging.basicConfig(format = "[%(asctime)s] %(message)s", datefmt="%m%d %H:%M:%S")
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+class VRNN(VRNNConfig):
+    def __init__(self, istest=False):
+        VRNNConfig.__init__(self)
+        logger.info("Building model starts...")
+        def NLL(y, mu, sigma):
+            '''Negative LogLiklihood
+            - log(1/sqrt(2*pi)e-(y-mu)^2/2/sigma^2)
+            = + 1/2*(log(2*pi)+ (y-mu)^2/2/sigma^2)
+            Args :
+                y - [batch_size x seq_length, 2*chunk_samples]
+                mu - [batch_size x seq_length, chunk_samples]
+                sigma - [batch_size x seq_length, chunk_samples]
+            return
+                NLL
+            '''
+            with tf.variable_scope('NLL'):
+                sigma_square = tf.maximum(1e-10, tf.square(sigma)) # sigma^2, avoid to be zero
+                norm = tf.subtract(y[:,:self.chunk_samples], mu) # x-\mu
+                z = tf.div(tf.square(norm), sigma_square) # (x-\mu)^2/sigma^2
+                denom_log = tf.log(2*np.pi*sigma_square)
+            return 0.5*tf.reduce_sum(z+denom_log, 1)
+
+        def kl_gaussian(mu_1, sigma_1, mu_2, sigma_2):
+            '''
+                Kullback leibler divergence for two gaussian distributions
+            '''
+            with tf.variable_scope("kl_gaussisan"):
+                return tf.reduce_sum(0.5 * (
+                    2 * tf.log(tf.maximum(1e-9, sigma_2),name='log_sigma_2')
+                  - 2 * tf.log(tf.maximum(1e-9, sigma_1),name='log_sigma_1')
+                  + (tf.square(sigma_1) + tf.square(mu_1 - mu_2)) / tf.maximum(1e-9,(tf.square(sigma_2))) - 1), 1)
+
+        def get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma, y):
+            kl_loss = kl_gaussian(enc_mu, enc_sigma, prior_mu, prior_sigma) # KL_divergence loss
+            likelihood_loss = NLL(y, dec_mu, dec_sigma) # Negative log liklihood loss
+            return tf.reduce_mean(kl_loss + likelihood_loss)
+
+        if istest:
+            self.batch_size = 1
+            self.seq_length = 1
+        logger.info("Building VRNNCell starts...")
+        self.cell = VRNNCell(self.chunk_samples, self.rnn_size, self.latent_size)
+        logger.info("Building VRNNCell done.")
+
+        # [batch_size, seq_length, chunk_samples*2]
+        self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.batch_size, self.seq_length, 2*self.chunk_samples], name='input_data')
+        # [batch_size, seq_length, chunk_samples*2]
+        self.target_data = tf.placeholder(dtype=tf.float32, shape=[self.batch_size, self.seq_length, 2*self.chunk_samples], name = 'target_data')
+        # [batch_size, rnn_size]
+        self.initial_state_c, self.initial_state_h = self.cell.zero_state(batch_size=self.batch_size, dtype=tf.float32)
+
+        with tf.variable_scope("inputs"):
+            inputs = tf.transpose(self.input_data, [1, 0, 2])  # [seq_length, batch_size, 2*chunk_samples]
+            inputs = tf.reshape(inputs, [-1, 2*self.chunk_samples]) # [seq_length*batch_size, 2*chunk_samples]
+            inputs = tf.split(axis=0, num_or_size_splits=self.seq_length, value=inputs) # seq_length * [batch_size, 2*chunk_samples]
+
+        # [batch_size* seq_length, chunk_samples*2]
+        self.target = tf.reshape(self.target_data, [-1, 2*self.chunk_samples])
+
+        outputs, last_state = tf.contrib.rnn.static_rnn(self.cell, inputs, initial_state=(self.initial_state_c, self.initial_state_h))
+        # outputs seq_length*tuple*[batch_size, chunk_samples]
+        outputs_reshape = []
+        names = ["enc_mu", "enc_sigma", "dec_mu", "dec_sigma", "prior_mu", "prior_sigma"]
+
+        for n, name in enumerate(names):
+            with tf.variable_scope(name):
+                x = tf.stack([o[n] for o in outputs]) # [seq_length, batch_size, chunk_samples]
+                x = tf.transpose(x,[1,0,2]) # [batch_size, seq_length, chunk_samples]
+                x = tf.reshape(x, [self.batch_size*self.seq_length, -1]) # [batch_size x seq_length, chunk_samples]
+                outputs_reshape.append(x)
+        # tuple*[batch_size x seq_length, chunk_samples]
+        enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma = outputs_reshape
+        self.mu = dec_mu
+        self.sigma = dec_sigma
+
+        self.final_state_c, self.final_state_h = last_state
+        self.cost = get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma, self.target)
+
+        print_vars("trainable_variables")
+        self.lr = tf.Variable(self.lr, trainable = False)
+        self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.cost)
+        logger.info("Building model done.")
+
+        self.sess = tf.Session()
+
+    def next_batch(self):
+        '''
+        3D signal
+            [batch_axis, time_axis, chunk_axis]
+        = common noise + noise + sin(time_axis[:] + time_offset)
+
+        half of the chunk_axis are all zeros
+
+        Return:
+            x, y
+            x - 3D ndarray
+                [self.batch_size, self.seq_length, 2*self.chunk_samples]
+            y - 3D ndarray
+                [self.batch_size, self.seq_length, 2*self.chunk_samples]
+
+        '''
+        t_offset = np.random.randn(self.batch_size, 1, (2 * self.chunk_samples))
+        mixed_noise = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.01
+
+        x = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.1 + mixed_noise + np.sin(2 * np.pi * (np.arange(self.seq_length)[np.newaxis, :, np.newaxis] / 10. + t_offset))
+        y = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.1 + mixed_noise + np.sin(2 * np.pi * (np.arange(1, self.seq_length+1)[np.newaxis, :, np.newaxis] / 10. + t_offset))
+
+        y[:, :, self.chunk_samples:] = 0.
+        x[:, :, self.chunk_samples:] = 0.
+        return x, y
+
+    def initialize(self):
+        logger.info("Initialization of parameters")
+        self.sess.run(tf.global_variables_initializer())
+
+    def restore(self):
+        saver = tf.train.Saver(tf.global_variables())
+        ckpt = tf.train.get_checkpoint_state(SAVE_DIR)
+        print("Load the model from {}".format(ckpt.model_checkpoint_path))
+        saver.restore(self.sess, ckpt.model_checkpoint_path)
+
+    def train(self):
+        create_dir(SAVE_DIR)
+        ckpt = tf.train.get_checkpoint_state(SAVE_DIR)
+        saver = tf.train.Saver(tf.global_variables())
+
+        if ckpt:
+            saver.restore(self.sess, ckpt.model_checkpoint_path)
+            print("Load the model from %s"%ckpt.model_checkpoint_path)
+
+        iteration = 0
+        for epoch in range(self.num_epochs):
+            # Learning rate decay
+            self.sess.run(tf.assign(self.lr, self.lr * (self.decay_rate ** epoch)))
+
+            for batch in range(self.n_batches):
+                x, y = self.next_batch()
+                feed_dict = {model.input_data: x, model.target_data: y}
+                train_loss, _, sigma= self.sess.run([self.cost, self.train_op, self.sigma], feed_dict = feed_dict)
+
+                iteration+=1
+                if iteration % self.log_every == 0 and iteration > 0:
+                    print("{}/{}(epoch {}), train_loss = {:.6f}, std = {:.3f}".format(iteration, self.num_epochs * self.n_batches, epoch+1, self.chunk_samples * train_loss, sigma.mean(axis=0).mean(axis=0)))
+                    checkpoint_path = os.path.join(SAVE_DIR, 'model.ckpt')
+                    saver.save(self.sess, checkpoint_path, global_step=iteration)
+                    logger.info("model saved to {}".format(checkpoint_path))
+
+
+    def sample(self, num=4410, start=None):
+        '''
+        Args :
+            num - int
+                4410
+            start - sequence
+                None => generate [1, 1, 2*self.chunk_samples]
+                start.shape==1 => generate [1, 1, 2*self.chunk_samples]
+                start.shape==2 [seq, 2*self.chunk_samples]
+                => generate(
+        Return :
+            chunks -
+            mus -
+            sigmas -
+        '''
+        def sample_gaussian(mu, sigma):
+            return mu + (sigma*np.random.randn(*sigma.shape))
+
+        # Initial condition
+        prev_state = self.sess.run(self.cell.zero_state(1, tf.float32)) # [batch_size, rnn_size]
+
+        if start is None:
+            prev_x = np.random.randn(1, 1, 2*self.chunk_samples)
+        elif len(start.shape) == 1:
+            prev_x = start[np.newaxis,np.newaxis,:]
+        elif len(start.shape) == 2:
+            for i in range(start.shape[0]-1):
+                prev_x = start[i,:] # [2*self.chunk_samples]
+                prev_x = prev_x[np.newaxis,np.newaxis,:] #[1, 1, 2*self.chunk_samples]
+
+                feed_dict = {
+                            self.input_data : prev_x,
+                            self.initial_state_c : prev_state[0],
+                            self.initial_state_h : prev_state[1]
+                            }
+
+                [prev_state_c, prev_state_h] = self.sess.run(
+                                                             [self.mu, self.sigma, self.final_state_c, self.final_state_h],
+                                                             feed_dict=feed_dict
+                                                            )
+                prev_state = prev_state_c, prev_state_h
+
+            prev_x = start[-1,:] # [2*self.chunk_samples]
+            prev_x = prev_x[np.newaxis,np.newaxis,:] # [1,1,2*self.chunk_samples]
+
+        chunks = np.zeros((num, 2*self.chunk_samples), dtype=np.float32)
+        mus = np.zeros((num, self.chunk_samples), dtype=np.float32)
+        sigmas = np.zeros((num, self.chunk_samples), dtype=np.float32)
+
+        for i in range(num):
+            feed_dict = {
+                         self.input_data : prev_x,
+                         self.initial_state_c : prev_state[0],
+                         self.initial_state_h : prev_state[1]
+                        }
+
+            [o_mu, o_sigma, next_state_c, next_state_h] = self.sess.run(
+                                                                        [self.mu, self.sigma, self.final_state_c, self.final_state_h],
+                                                                        feed_dict = feed_dict
+                                                                       )
+            next_x = np.hstack(
+                                (
+                                    sample_gaussian(o_mu, o_sigma), np.zeros((1, self.chunk_samples))
+                                )
+                              ) # [1, 2*self.chunk_samples]
+            chunks[i] = next_x
+            mus[i] = o_mu
+            sigmas[i] = o_sigma
+
+            prev_x = np.zeros((1, 1, 2*self.chunk_samples), dtype=np.float32)
+            prev_x[0] = next_x
+            prev_state = next_state_c, next_state_h
+
+        return chunks, mus, sigmas
+
+if __name__ == '__main__':
+    model = VRNN()
+    model.initialize()
+    model.train()
+    '''
+    Test code
+    model2 = VRNN(True)
+    model2.restore()
+    print(model2.sample())
+    '''