diff --git a/README.md b/README.md
index a5e8b3b..4bd3741 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,30 @@
 # tensorflow-vrnn
 A variational recurrent neural network as described in:
 
-Chung, J., Kastner, K., Dinh, L., Goel, K., Courville, A. C., & Bengio, Y. (2015). A recurrent latent variable model for sequential data. In Advances in neural information processing systems (pp. 2980-2988).
+[Chung, J., Kastner, K., Dinh, L., Goel, K., Courville, A. C., & Bengio, Y. (2015). A recurrent latent variable model for sequential data. In Advances in neural information processing systems (pp. 2980-2988).](https://arxiv.org/abs/1506.02216)
 
+## Requirements
+python == 3.5
+tensorflow == 1.2.1
+numpy==1.13.1
 
-![VRNN Structure](graph1.png?raw=true "VRNN Structure")
+## main.py
+* train this model
+```python
+python main.py
+```
+## cell.py
+* **VRNNCell** structure
+
+## utils.py
+* Basic functions implementation
+
+## ops.py
+* Basic operations based on tensorflow
 
+## config.py
+* Basic configuration of model
+* Every configuration can be changed here.
+
+![VRNN Structure](graph1.png?raw=true "VRNN Structure")
 ![Global Structure](graph2.png?raw=true "Global Structure")
diff --git a/cell.py b/cell.py
new file mode 100644
index 0000000..f571847
--- /dev/null
+++ b/cell.py
@@ -0,0 +1,69 @@
+from ops import fc_layer, get_shape, print_vars
+import tensorflow as tf
+import numpy as np
+
+class VRNNCell(tf.nn.rnn_cell.RNNCell):
+    """Variational RNN cell."""
+
+    def __init__(self, x_dim, h_dim, z_dim = 100):
+        '''
+        Args:
+            x_dim - chunk_samples
+            h_dim - rnn_size
+            z_dim - latent_size
+        '''
+        self.n_h = h_dim
+        self.n_x = x_dim
+        self.n_z = z_dim
+        self.n_x_1 = x_dim
+        self.n_z_1 = z_dim
+        self.n_enc_hidden = z_dim
+        self.n_dec_hidden = x_dim
+        self.n_prior_hidden = z_dim
+        self.lstm = tf.nn.rnn_cell.LSTMCell(self.n_h, state_is_tuple=True)
+
+    @property
+    def state_size(self):
+        return (self.n_h, self.n_h)
+
+    @property
+    def output_size(self):
+        return self.n_h
+
+    def __call__(self, x, state, scope=None):
+        '''
+		Args:
+			x - input 2D tensor [batch_size x 2*self.chunk_samples]
+			state - tuple
+				(hidden, cell_state)
+			scope - string
+				defaults to be None
+    	'''
+        with tf.variable_scope(scope or type(self).__name__):
+            h, c = state
+            with tf.variable_scope("Prior"):
+                prior_hidden = fc_layer(h, self.n_prior_hidden, activation = tf.nn.relu, scope = "hidden")
+                prior_mu = fc_layer(prior_hidden, self.n_z, scope = "mu")
+                prior_sigma = fc_layer(prior_hidden, self.n_z, activation = tf.nn.softplus, scope = "sigma")# >=0
+
+            x_1 = fc_layer(x, self.n_x_1, activation = tf.nn.relu, scope = "phi_x")# >=0
+
+            with tf.variable_scope("Encoder"):
+                enc_hidden = fc_layer(tf.concat(values=(x_1, h), axis=1), self.n_enc_hidden, activation = tf.nn.relu, scope = "hidden")
+                enc_mu = fc_layer(enc_hidden, self.n_z, scope = 'mu')
+                enc_sigma = fc_layer(enc_hidden, self.n_z, activation = tf.nn.softplus, scope = 'sigma')
+
+            # Random sampling ~ N(0, 1)
+            eps = tf.random_normal((get_shape(x)[0], self.n_z), 0.0, 1.0, dtype=tf.float32)
+            # z = mu + sigma*epsilon, latent variable from reparametrization trick
+            z = tf.add(enc_mu, tf.multiply(enc_sigma, eps))
+            z_1 = fc_layer(z, self.n_z_1, activation = tf.nn.relu, scope = "phi_z")
+
+            with tf.variable_scope("Decoder"):
+                dec_hidden = fc_layer(tf.concat(values=(z_1, h), axis=1), self.n_dec_hidden, activation = tf.nn.relu, scope = "hidden")
+                dec_mu = fc_layer(dec_hidden, self.n_x, scope = "mu")
+                dec_sigma = fc_layer(dec_hidden, self.n_x, activation = tf.nn.softplus, scope = "sigma")
+
+            output, next_state = self.lstm(tf.concat(values=(x_1, z_1), axis=1), state)
+
+        return (enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma), next_state
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..f2a4d8e
--- /dev/null
+++ b/config.py
@@ -0,0 +1,22 @@
+#=================================PATH=========================#
+
+SAVE_DIR = './save/'
+
+#======================VRNN configuration=======================#
+
+class VRNNConfig(object):
+    def __init__(self):
+        self.rnn_size = 3 # num of hidden states in RNN
+        self.latent_size = 3 # size of latent space
+
+        self.seq_length = 100 # RNN sequence length
+        self.chunk_samples = 1 # number of samples per mdct chunk
+
+        self.num_epochs = 5
+        self.batch_size = 3000
+        self.n_batches = 100
+        self.log_every = 20
+
+        self.grad_clip = 10 # clip gradients at this value
+        self.decay_rate = 1.
+        self.lr = 0.0005 # initial learning_rate
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..5216979
--- /dev/null
+++ b/main.py
@@ -0,0 +1,249 @@
+from utils import create_dir, pickle_save
+from config import SAVE_DIR, VRNNConfig
+from datetime import datetime
+from ops import print_vars
+from cell import VRNNCell
+
+import tensorflow as tf
+import numpy as np
+import logging
+import pickle
+import os
+
+logging.basicConfig(format = "[%(asctime)s] %(message)s", datefmt="%m%d %H:%M:%S")
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+class VRNN(VRNNConfig):
+    def __init__(self, istest=False):
+        VRNNConfig.__init__(self)
+        logger.info("Building model starts...")
+        def NLL(y, mu, sigma):
+            '''Negative LogLiklihood
+            - log(1/sqrt(2*pi)e-(y-mu)^2/2/sigma^2)
+            = + 1/2*(log(2*pi)+ (y-mu)^2/2/sigma^2)
+            Args :
+                y - [batch_size x seq_length, 2*chunk_samples]
+                mu - [batch_size x seq_length, chunk_samples]
+                sigma - [batch_size x seq_length, chunk_samples]
+            return
+                NLL
+            '''
+            with tf.variable_scope('NLL'):
+                sigma_square = tf.maximum(1e-10, tf.square(sigma)) # sigma^2, avoid to be zero
+                norm = tf.subtract(y[:,:self.chunk_samples], mu) # x-\mu
+                z = tf.div(tf.square(norm), sigma_square) # (x-\mu)^2/sigma^2
+                denom_log = tf.log(2*np.pi*sigma_square)
+            return 0.5*tf.reduce_sum(z+denom_log, 1)
+
+        def kl_gaussian(mu_1, sigma_1, mu_2, sigma_2):
+            '''
+                Kullback leibler divergence for two gaussian distributions
+            '''
+            with tf.variable_scope("kl_gaussisan"):
+                return tf.reduce_sum(0.5 * (
+                    2 * tf.log(tf.maximum(1e-9, sigma_2),name='log_sigma_2')
+                  - 2 * tf.log(tf.maximum(1e-9, sigma_1),name='log_sigma_1')
+                  + (tf.square(sigma_1) + tf.square(mu_1 - mu_2)) / tf.maximum(1e-9,(tf.square(sigma_2))) - 1), 1)
+
+        def get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma, y):
+            kl_loss = kl_gaussian(enc_mu, enc_sigma, prior_mu, prior_sigma) # KL_divergence loss
+            likelihood_loss = NLL(y, dec_mu, dec_sigma) # Negative log liklihood loss
+            return tf.reduce_mean(kl_loss + likelihood_loss)
+
+        if istest:
+            self.batch_size = 1
+            self.seq_length = 1
+        logger.info("Building VRNNCell starts...")
+        self.cell = VRNNCell(self.chunk_samples, self.rnn_size, self.latent_size)
+        logger.info("Building VRNNCell done.")
+
+        # [batch_size, seq_length, chunk_samples*2]
+        self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.batch_size, self.seq_length, 2*self.chunk_samples], name='input_data')
+        # [batch_size, seq_length, chunk_samples*2]
+        self.target_data = tf.placeholder(dtype=tf.float32, shape=[self.batch_size, self.seq_length, 2*self.chunk_samples], name = 'target_data')
+        # [batch_size, rnn_size]
+        self.initial_state_c, self.initial_state_h = self.cell.zero_state(batch_size=self.batch_size, dtype=tf.float32)
+
+        with tf.variable_scope("inputs"):
+            inputs = tf.transpose(self.input_data, [1, 0, 2])  # [seq_length, batch_size, 2*chunk_samples]
+            inputs = tf.reshape(inputs, [-1, 2*self.chunk_samples]) # [seq_length*batch_size, 2*chunk_samples]
+            inputs = tf.split(axis=0, num_or_size_splits=self.seq_length, value=inputs) # seq_length * [batch_size, 2*chunk_samples]
+
+        # [batch_size* seq_length, chunk_samples*2]
+        self.target = tf.reshape(self.target_data, [-1, 2*self.chunk_samples])
+
+        outputs, last_state = tf.contrib.rnn.static_rnn(self.cell, inputs, initial_state=(self.initial_state_c, self.initial_state_h))
+        # outputs seq_length*tuple*[batch_size, chunk_samples]
+        outputs_reshape = []
+        names = ["enc_mu", "enc_sigma", "dec_mu", "dec_sigma", "prior_mu", "prior_sigma"]
+
+        for n, name in enumerate(names):
+            with tf.variable_scope(name):
+                x = tf.stack([o[n] for o in outputs]) # [seq_length, batch_size, chunk_samples]
+                x = tf.transpose(x,[1,0,2]) # [batch_size, seq_length, chunk_samples]
+                x = tf.reshape(x, [self.batch_size*self.seq_length, -1]) # [batch_size x seq_length, chunk_samples]
+                outputs_reshape.append(x)
+        # tuple*[batch_size x seq_length, chunk_samples]
+        enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma = outputs_reshape
+        self.mu = dec_mu
+        self.sigma = dec_sigma
+
+        self.final_state_c, self.final_state_h = last_state
+        self.cost = get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma, self.target)
+
+        print_vars("trainable_variables")
+        self.lr = tf.Variable(self.lr, trainable = False)
+        self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.cost)
+        logger.info("Building model done.")
+
+        self.sess = tf.Session()
+
+    def next_batch(self):
+        '''
+        3D signal
+            [batch_axis, time_axis, chunk_axis]
+        = common noise + noise + sin(time_axis[:] + time_offset)
+
+        half of the chunk_axis are all zeros
+
+        Return:
+            x, y
+            x - 3D ndarray
+                [self.batch_size, self.seq_length, 2*self.chunk_samples]
+            y - 3D ndarray
+                [self.batch_size, self.seq_length, 2*self.chunk_samples]
+
+        '''
+        t_offset = np.random.randn(self.batch_size, 1, (2 * self.chunk_samples))
+        mixed_noise = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.01
+
+        x = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.1 + mixed_noise + np.sin(2 * np.pi * (np.arange(self.seq_length)[np.newaxis, :, np.newaxis] / 10. + t_offset))
+        y = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.1 + mixed_noise + np.sin(2 * np.pi * (np.arange(1, self.seq_length+1)[np.newaxis, :, np.newaxis] / 10. + t_offset))
+
+        y[:, :, self.chunk_samples:] = 0.
+        x[:, :, self.chunk_samples:] = 0.
+        return x, y
+
+    def initialize(self):
+        logger.info("Initialization of parameters")
+        self.sess.run(tf.global_variables_initializer())
+
+    def restore(self):
+        saver = tf.train.Saver(tf.global_variables())
+        ckpt = tf.train.get_checkpoint_state(SAVE_DIR)
+        print("Load the model from {}".format(ckpt.model_checkpoint_path))
+        saver.restore(self.sess, ckpt.model_checkpoint_path)
+
+    def train(self):
+        create_dir(SAVE_DIR)
+        ckpt = tf.train.get_checkpoint_state(SAVE_DIR)
+        saver = tf.train.Saver(tf.global_variables())
+
+        if ckpt:
+            saver.restore(self.sess, ckpt.model_checkpoint_path)
+            print("Load the model from %s"%ckpt.model_checkpoint_path)
+
+        iteration = 0
+        for epoch in range(self.num_epochs):
+            # Learning rate decay
+            self.sess.run(tf.assign(self.lr, self.lr * (self.decay_rate ** epoch)))
+
+            for batch in range(self.n_batches):
+                x, y = self.next_batch()
+                feed_dict = {model.input_data: x, model.target_data: y}
+                train_loss, _, sigma= self.sess.run([self.cost, self.train_op, self.sigma], feed_dict = feed_dict)
+
+                iteration+=1
+                if iteration % self.log_every == 0 and iteration > 0:
+                    print("{}/{}(epoch {}), train_loss = {:.6f}, std = {:.3f}".format(iteration, self.num_epochs * self.n_batches, epoch+1, self.chunk_samples * train_loss, sigma.mean(axis=0).mean(axis=0)))
+                    checkpoint_path = os.path.join(SAVE_DIR, 'model.ckpt')
+                    saver.save(self.sess, checkpoint_path, global_step=iteration)
+                    logger.info("model saved to {}".format(checkpoint_path))
+
+
+    def sample(self, num=4410, start=None):
+        '''
+        Args :
+            num - int
+                4410
+            start - sequence
+                None => generate [1, 1, 2*self.chunk_samples]
+                start.shape==1 => generate [1, 1, 2*self.chunk_samples]
+                start.shape==2 [seq, 2*self.chunk_samples]
+                => generate(
+        Return :
+            chunks -
+            mus -
+            sigmas -
+        '''
+        def sample_gaussian(mu, sigma):
+            return mu + (sigma*np.random.randn(*sigma.shape))
+
+        # Initial condition
+        prev_state = self.sess.run(self.cell.zero_state(1, tf.float32)) # [batch_size, rnn_size]
+
+        if start is None:
+            prev_x = np.random.randn(1, 1, 2*self.chunk_samples)
+        elif len(start.shape) == 1:
+            prev_x = start[np.newaxis,np.newaxis,:]
+        elif len(start.shape) == 2:
+            for i in range(start.shape[0]-1):
+                prev_x = start[i,:] # [2*self.chunk_samples]
+                prev_x = prev_x[np.newaxis,np.newaxis,:] #[1, 1, 2*self.chunk_samples]
+
+                feed_dict = {
+                            self.input_data : prev_x,
+                            self.initial_state_c : prev_state[0],
+                            self.initial_state_h : prev_state[1]
+                            }
+
+                [prev_state_c, prev_state_h] = self.sess.run(
+                                                             [self.mu, self.sigma, self.final_state_c, self.final_state_h],
+                                                             feed_dict=feed_dict
+                                                            )
+                prev_state = prev_state_c, prev_state_h
+
+            prev_x = start[-1,:] # [2*self.chunk_samples]
+            prev_x = prev_x[np.newaxis,np.newaxis,:] # [1,1,2*self.chunk_samples]
+
+        chunks = np.zeros((num, 2*self.chunk_samples), dtype=np.float32)
+        mus = np.zeros((num, self.chunk_samples), dtype=np.float32)
+        sigmas = np.zeros((num, self.chunk_samples), dtype=np.float32)
+
+        for i in range(num):
+            feed_dict = {
+                         self.input_data : prev_x,
+                         self.initial_state_c : prev_state[0],
+                         self.initial_state_h : prev_state[1]
+                        }
+
+            [o_mu, o_sigma, next_state_c, next_state_h] = self.sess.run(
+                                                                        [self.mu, self.sigma, self.final_state_c, self.final_state_h],
+                                                                        feed_dict = feed_dict
+                                                                       )
+            next_x = np.hstack(
+                                (
+                                    sample_gaussian(o_mu, o_sigma), np.zeros((1, self.chunk_samples))
+                                )
+                              ) # [1, 2*self.chunk_samples]
+            chunks[i] = next_x
+            mus[i] = o_mu
+            sigmas[i] = o_sigma
+
+            prev_x = np.zeros((1, 1, 2*self.chunk_samples), dtype=np.float32)
+            prev_x[0] = next_x
+            prev_state = next_state_c, next_state_h
+
+        return chunks, mus, sigmas
+
+if __name__ == '__main__':
+    model = VRNN()
+    model.initialize()
+    model.train()
+    '''
+    Test code
+    model2 = VRNN(True)
+    model2.restore()
+    print(model2.sample())
+    '''
diff --git a/model_vrnn.py b/model_vrnn.py
deleted file mode 100644
index e3986fa..0000000
--- a/model_vrnn.py
+++ /dev/null
@@ -1,229 +0,0 @@
-import tensorflow as tf
-import numpy as np
-
-def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False):
-    shape = input_.get_shape().as_list()
-
-    with tf.variable_scope(scope or "Linear"):
-        matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32,
-                                 tf.random_normal_initializer(stddev=stddev))
-        bias = tf.get_variable("bias", [output_size],
-            initializer=tf.constant_initializer(bias_start))
-        if with_w:
-            return tf.matmul(input_, matrix) + bias, matrix, bias
-        else:
-            return tf.matmul(input_, matrix) + bias
-
-class VartiationalRNNCell(tf.contrib.rnn.RNNCell):
-    """Variational RNN cell."""
-
-    def __init__(self, x_dim, h_dim, z_dim = 100):
-        self.n_h = h_dim
-        self.n_x = x_dim
-        self.n_z = z_dim
-        self.n_x_1 = x_dim
-        self.n_z_1 = z_dim
-        self.n_enc_hidden = z_dim
-        self.n_dec_hidden = x_dim
-        self.n_prior_hidden = z_dim
-        self.lstm = tf.contrib.rnn.LSTMCell(self.n_h, state_is_tuple=True)
-
-
-    @property
-    def state_size(self):
-        return (self.n_h, self.n_h)
-
-    @property
-    def output_size(self):
-        return self.n_h
-
-    def __call__(self, x, state, scope=None):
-        with tf.variable_scope(scope or type(self).__name__):
-            h, c = state
-
-            with tf.variable_scope("Prior"):
-                with tf.variable_scope("hidden"):
-                    prior_hidden = tf.nn.relu(linear(h, self.n_prior_hidden))
-                with tf.variable_scope("mu"):
-                    prior_mu = linear(prior_hidden, self.n_z)
-                with tf.variable_scope("sigma"):
-                    prior_sigma = tf.nn.softplus(linear(prior_hidden, self.n_z))
-
-            with tf.variable_scope("phi_x"):
-                x_1 = tf.nn.relu(linear(x, self.n_x_1))
-
-            with tf.variable_scope("Encoder"):
-                with tf.variable_scope("hidden"):
-                    enc_hidden = tf.nn.relu(linear(tf.concat(axis=1,values=(x_1, h)), self.n_enc_hidden))
-                with tf.variable_scope("mu"):
-                    enc_mu    = linear(enc_hidden, self.n_z)
-                with tf.variable_scope("sigma"):
-                    enc_sigma = tf.nn.softplus(linear(enc_hidden, self.n_z))
-            eps = tf.random_normal((x.get_shape().as_list()[0], self.n_z), 0.0, 1.0, dtype=tf.float32)
-            # z = mu + sigma*epsilon
-            z = tf.add(enc_mu, tf.multiply(enc_sigma, eps))
-            with tf.variable_scope("phi_z"):
-                z_1 = tf.nn.relu(linear(z, self.n_z_1))
-
-            with tf.variable_scope("Decoder"):
-                with tf.variable_scope("hidden"):
-                    dec_hidden = tf.nn.relu(linear(tf.concat(axis=1,values=(z_1, h)), self.n_dec_hidden))
-                with tf.variable_scope("mu"):
-                    dec_mu = linear(dec_hidden, self.n_x)
-                with tf.variable_scope("sigma"):
-                    dec_sigma = tf.nn.softplus(linear(dec_hidden, self.n_x))
-                with tf.variable_scope("rho"):
-                    dec_rho = tf.nn.sigmoid(linear(dec_hidden, self.n_x))
-
-
-            output, state2 = self.lstm(tf.concat(axis=1,values=(x_1, z_1)), state)
-        return (enc_mu, enc_sigma, dec_mu, dec_sigma, dec_rho, prior_mu, prior_sigma), state2
-
-
-
-
-class VRNN():
-    def __init__(self, args, sample=False):
-
-        def tf_normal(y, mu, s, rho):
-            with tf.variable_scope('normal'):
-                ss = tf.maximum(1e-10,tf.square(s))
-                norm = tf.subtract(y[:,:args.chunk_samples], mu)
-                z = tf.div(tf.square(norm), ss)
-                denom_log = tf.log(2*np.pi*ss, name='denom_log')
-                result = tf.reduce_sum(z+denom_log, 1)/2# -
-                                       #(tf.log(tf.maximum(1e-20,rho),name='log_rho')*(1+y[:,args.chunk_samples:])
-                                       # +tf.log(tf.maximum(1e-20,1-rho),name='log_rho_inv')*(1-y[:,args.chunk_samples:]))/2, 1)
-
-            return result
-
-        def tf_kl_gaussgauss(mu_1, sigma_1, mu_2, sigma_2):
-            with tf.variable_scope("kl_gaussgauss"):
-                return tf.reduce_sum(0.5 * (
-                    2 * tf.log(tf.maximum(1e-9,sigma_2),name='log_sigma_2') 
-                  - 2 * tf.log(tf.maximum(1e-9,sigma_1),name='log_sigma_1')
-                  + (tf.square(sigma_1) + tf.square(mu_1 - mu_2)) / tf.maximum(1e-9,(tf.square(sigma_2))) - 1
-                ), 1)
-
-        def get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, dec_rho, prior_mu, prior_sigma, y):
-            kl_loss = tf_kl_gaussgauss(enc_mu, enc_sigma, prior_mu, prior_sigma)
-            likelihood_loss = tf_normal(y, dec_mu, dec_sigma, dec_rho)
-
-            return tf.reduce_mean(kl_loss + likelihood_loss)
-            #return tf.reduce_mean(likelihood_loss)
-
-        self.args = args
-        if sample:
-            args.batch_size = 1
-            args.seq_length = 1
-
-        cell = VartiationalRNNCell(args.chunk_samples, args.rnn_size, args.latent_size)
-
-        self.cell = cell
-
-        self.input_data = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, args.seq_length, 2*args.chunk_samples], name='input_data')
-        self.target_data = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, args.seq_length, 2*args.chunk_samples],name = 'target_data')
-        self.initial_state_c, self.initial_state_h = cell.zero_state(batch_size=args.batch_size, dtype=tf.float32)
-
-
-        # input shape: (batch_size, n_steps, n_input)
-        with tf.variable_scope("inputs"):
-            inputs = tf.transpose(self.input_data, [1, 0, 2])  # permute n_steps and batch_size
-            inputs = tf.reshape(inputs, [-1, 2*args.chunk_samples]) # (n_steps*batch_size, n_input)
-
-            # Split data because rnn cell needs a list of inputs for the RNN inner loop
-            inputs = tf.split(axis=0, num_or_size_splits=args.seq_length, value=inputs) # n_steps * (batch_size, n_hidden)
-        flat_target_data = tf.reshape(self.target_data,[-1, 2*args.chunk_samples])
-
-        self.target = flat_target_data
-        self.flat_input = tf.reshape(tf.transpose(tf.stack(inputs),[1,0,2]),[args.batch_size*args.seq_length, -1])
-        self.input = tf.stack(inputs)
-        # Get vrnn cell output
-        outputs, last_state = tf.contrib.rnn.static_rnn(cell, inputs, initial_state=(self.initial_state_c,self.initial_state_h))
-        #print outputs
-        #outputs = map(tf.pack,zip(*outputs))
-        outputs_reshape = []
-        names = ["enc_mu", "enc_sigma", "dec_mu", "dec_sigma", "dec_rho", "prior_mu", "prior_sigma"]
-        for n,name in enumerate(names):
-            with tf.variable_scope(name):
-                x = tf.stack([o[n] for o in outputs])
-                x = tf.transpose(x,[1,0,2])
-                x = tf.reshape(x,[args.batch_size*args.seq_length, -1])
-                outputs_reshape.append(x)
-
-        enc_mu, enc_sigma, dec_mu, dec_sigma, dec_rho, prior_mu, prior_sigma = outputs_reshape
-        self.final_state_c,self.final_state_h = last_state
-        self.mu = dec_mu
-        self.sigma = dec_sigma
-        self.rho = dec_rho
-
-        lossfunc = get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, dec_sigma, prior_mu, prior_sigma, flat_target_data)
-        self.sigma = dec_sigma
-        self.mu = dec_mu
-        with tf.variable_scope('cost'):
-            self.cost = lossfunc 
-        tf.summary.scalar('cost', self.cost)
-        tf.summary.scalar('mu', tf.reduce_mean(self.mu))
-        tf.summary.scalar('sigma', tf.reduce_mean(self.sigma))
-
-
-        self.lr = tf.Variable(0.0, trainable=False)
-        tvars = tf.trainable_variables()
-        for t in tvars:
-            print t.name
-        grads = tf.gradients(self.cost, tvars)
-        #grads = tf.cond(
-        #    tf.global_norm(grads) > 1e-20,
-        #    lambda: tf.clip_by_global_norm(grads, args.grad_clip)[0],
-        #    lambda: grads)
-        optimizer = tf.train.AdamOptimizer(self.lr)
-        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
-        #self.saver = tf.train.Saver(tf.all_variables())
-
-    def sample(self, sess, args, num=4410, start=None):
-
-        def sample_gaussian(mu, sigma):
-            return mu + (sigma*np.random.randn(*sigma.shape))
-
-        if start is None:
-            prev_x = np.random.randn(1, 1, 2*args.chunk_samples)
-        elif len(start.shape) == 1:
-            prev_x = start[np.newaxis,np.newaxis,:]
-        elif len(start.shape) == 2:
-            for i in range(start.shape[0]-1):
-                prev_x = start[i,:]
-                prev_x = prev_x[np.newaxis,np.newaxis,:]
-                feed = {self.input_data: prev_x,
-                        self.initial_state_c:prev_state[0],
-                        self.initial_state_h:prev_state[1]}
-                
-                [o_mu, o_sigma, o_rho, prev_state_c, prev_state_h] = sess.run(
-                        [self.mu, self.sigma, self.rho,
-                         self.final_state_c,self.final_state_h],feed)
-
-            prev_x = start[-1,:]
-            prev_x = prev_x[np.newaxis,np.newaxis,:]
-
-        prev_state = sess.run(self.cell.zero_state(1, tf.float32))
-        chunks = np.zeros((num, 2*args.chunk_samples), dtype=np.float32)
-        mus = np.zeros((num, args.chunk_samples), dtype=np.float32)
-        sigmas = np.zeros((num, args.chunk_samples), dtype=np.float32)
-
-        for i in xrange(num):
-            feed = {self.input_data: prev_x,
-                    self.initial_state_c:prev_state[0],
-                    self.initial_state_h:prev_state[1]}
-            [o_mu, o_sigma, o_rho, next_state_c, next_state_h] = sess.run([self.mu, self.sigma,
-                self.rho, self.final_state_c, self.final_state_h],feed)
-
-            next_x = np.hstack((sample_gaussian(o_mu, o_sigma),
-                                2.*(o_rho > np.random.random(o_rho.shape[:2]))-1.))
-            chunks[i] = next_x
-            mus[i] = o_mu
-            sigmas[i] = o_sigma
-
-            prev_x = np.zeros((1, 1, 2*args.chunk_samples), dtype=np.float32)
-            prev_x[0][0] = next_x
-            prev_state = next_state_c, next_state_h
-
-        return chunks, mus, sigmas
diff --git a/ops.py b/ops.py
new file mode 100644
index 0000000..87fcad4
--- /dev/null
+++ b/ops.py
@@ -0,0 +1,42 @@
+import tensorflow as tf
+
+def get_shape(tensor):
+    '''return the shape of tensor as list'''
+    return tensor.get_shape().as_list()
+
+def print_vars(string):
+    '''print variables in collection named string'''
+    print("Collection name %s"%string)
+    print("    "+"\n    ".join(["{} : {}".format(v.name, get_shape(v)) for v in tf.get_collection(string)]))
+
+def fc_layer(input_, output_size, activation = None, batch_norm = False, istrain = False, scope = None):
+    '''
+    fully convlolution layer
+    Args :
+        input_  - 2D tensor
+            general shape : [batch, input_size]
+        output_size - int
+            shape of output 2D tensor
+        activation - activation function
+            defaults to be None
+        batch_norm - bool
+            defaults to be False
+            if batch_norm to apply batch_normalization
+        istrain - bool
+            defaults to be False
+            indicator for phase train or not
+        scope - string
+            defaults to be None then scope becomes "fc"
+    '''
+    with tf.variable_scope(scope or "fc"):
+        w = tf.get_variable(name="w", shape = [get_shape(input_)[1], output_size], initializer=tf.contrib.layers.xavier_initializer()) 
+        if batch_norm:
+            norm = tf.contrib.layers.batch_norm(tf.matmul(input_, w) , center=True, scale=True, decay = 0.8, is_training=istrain, scope='batch_norm')
+            if activation is None:
+                return norm
+            return activation(norm)
+        else:
+            b = tf.get_variable(name="b", shape = [output_size], initializer=tf.constant_initializer(0.01))
+            if activation is None:
+                return tf.nn.xw_plus_b(input_, w, b)
+            return activation(tf.nn.xw_plus_b(input_, w, b))
diff --git a/sample_vrnn.py b/sample_vrnn.py
deleted file mode 100644
index b8e4bc2..0000000
--- a/sample_vrnn.py
+++ /dev/null
@@ -1,21 +0,0 @@
-import tensorflow as tf
-
-import os
-import cPickle
-from model_vrnn import VRNN
-import numpy as np
-
-from train_vrnn import next_batch
-
-with open(os.path.join('save-vrnn', 'config.pkl')) as f:
-    saved_args = cPickle.load(f)
-
-model = VRNN(saved_args, True)
-sess = tf.InteractiveSession()
-saver = tf.train.Saver(tf.all_variables())
-
-ckpt = tf.train.get_checkpoint_state('save-vrnn')
-print "loading model: ",ckpt.model_checkpoint_path
-
-saver.restore(sess, ckpt.model_checkpoint_path)
-sample_data,mus,sigmas = model.sample(sess,saved_args)
diff --git a/train_vrnn.py b/train_vrnn.py
deleted file mode 100644
index c79f8b7..0000000
--- a/train_vrnn.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import numpy as np
-import tensorflow as tf
-
-import argparse
-import glob
-import time
-from datetime import datetime
-import os
-import cPickle
-
-from model_vrnn import VRNN
-
-from matplotlib import pyplot as plt
-
-'''
-TODOS:
-    - parameters for depth and width of hidden layers
-    - implement predict function
-    - separate binary and gaussian variables
-    - clean up nomenclature to remove MDCT references
-    - implement separate MDCT training and sampling version
-'''
-
-def next_batch(args):
-    t0 = np.random.randn(args.batch_size, 1, (2 * args.chunk_samples))
-    mixed_noise = np.random.randn(
-        args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1
-    #x = t0 + mixed_noise + np.random.randn(
-    #    args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1
-    #y = t0 + mixed_noise + np.random.randn(
-    #    args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1
-    x = np.sin(2 * np.pi * (np.arange(args.seq_length)[np.newaxis, :, np.newaxis] / 10. + t0)) + np.random.randn(
-        args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1 + mixed_noise*0.1
-    y = np.sin(2 * np.pi * (np.arange(1, args.seq_length + 1)[np.newaxis, :, np.newaxis] / 10. + t0)) + np.random.randn(
-        args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1 + mixed_noise*0.1
-
-    y[:, :, args.chunk_samples:] = 0.
-    x[:, :, args.chunk_samples:] = 0.
-    return x, y
-
-
-def train(args, model):
-    dirname = 'save-vrnn'
-    if not os.path.exists(dirname):
-        os.makedirs(dirname)
-
-    with open(os.path.join(dirname, 'config.pkl'), 'w') as f:
-        cPickle.dump(args, f)
-
-    ckpt = tf.train.get_checkpoint_state(dirname)
-    n_batches = 100
-    with tf.Session() as sess:
-        summary_writer = tf.summary.FileWriter('logs/' + datetime.now().isoformat().replace(':', '-'), sess.graph)
-        check = tf.add_check_numerics_ops()
-        merged = tf.summary.merge_all()
-        tf.global_variables_initializer().run()
-        saver = tf.train.Saver(tf.global_variables())
-        if ckpt:
-            saver.restore(sess, ckpt.model_checkpoint_path)
-            print "Loaded model"
-        start = time.time()
-        for e in xrange(args.num_epochs):
-            sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e)))
-            state = model.initial_state_c, model.initial_state_h
-            for b in xrange(n_batches):
-                x, y = next_batch(args)
-                feed = {model.input_data: x, model.target_data: y}
-                train_loss, _, cr, summary, sigma, mu, input, target= sess.run(
-                        [model.cost, model.train_op, check, merged, model.sigma, model.mu, model.flat_input, model.target],
-                                                             feed)
-                summary_writer.add_summary(summary, e * n_batches + b)
-                if (e * n_batches + b) % args.save_every == 0 and ((e * n_batches + b) > 0):
-                    checkpoint_path = os.path.join(dirname, 'model.ckpt')
-                    saver.save(sess, checkpoint_path, global_step=e * n_batches + b)
-                    print "model saved to {}".format(checkpoint_path)
-                end = time.time()
-                print "{}/{} (epoch {}), train_loss = {:.6f}, time/batch = {:.1f}, std = {:.3f}" \
-                    .format(e * n_batches + b,
-                            args.num_epochs * n_batches,
-                            e, args.chunk_samples * train_loss, end - start, sigma.mean(axis=0).mean(axis=0))
-                start = time.time()
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--rnn_size', type=int, default=3,
-                        help='size of RNN hidden state')
-    parser.add_argument('--latent_size', type=int, default=3,
-                        help='size of latent space')
-    parser.add_argument('--batch_size', type=int, default=3000,
-                        help='minibatch size')
-    parser.add_argument('--seq_length', type=int, default=100,
-                        help='RNN sequence length')
-    parser.add_argument('--num_epochs', type=int, default=100,
-                        help='number of epochs')
-    parser.add_argument('--save_every', type=int, default=500,
-                        help='save frequency')
-    parser.add_argument('--grad_clip', type=float, default=10.,
-                        help='clip gradients at this value')
-    parser.add_argument('--learning_rate', type=float, default=0.0005,
-                        help='learning rate')
-    parser.add_argument('--decay_rate', type=float, default=1.,
-                        help='decay of learning rate')
-    parser.add_argument('--chunk_samples', type=int, default=1,
-                        help='number of samples per mdct chunk')
-    args = parser.parse_args()
-
-    model = VRNN(args)
-
-    train(args, model)
diff --git a/utils.py b/utils.py
new file mode 100644
index 0000000..64e46af
--- /dev/null
+++ b/utils.py
@@ -0,0 +1,17 @@
+import os
+import pickle
+
+def create_dir(dirname):
+    if not os.path.exists(dirname):
+        os.makedirs(dirname)
+
+def pickle_load(path):
+    '''Load the picke data from path'''
+    with open(path, 'rb') as f:
+        loaded_pickle = pickle.load(f)
+    return loaded_pickle
+
+def pickle_save(content, path):
+    '''Save the content on the path'''
+    with open(path, 'wb') as f:
+        pickle.dump(content, f)