diff --git a/README.md b/README.md index a5e8b3b..4bd3741 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,30 @@ # tensorflow-vrnn A variational recurrent neural network as described in: -Chung, J., Kastner, K., Dinh, L., Goel, K., Courville, A. C., & Bengio, Y. (2015). A recurrent latent variable model for sequential data. In Advances in neural information processing systems (pp. 2980-2988). +[Chung, J., Kastner, K., Dinh, L., Goel, K., Courville, A. C., & Bengio, Y. (2015). A recurrent latent variable model for sequential data. In Advances in neural information processing systems (pp. 2980-2988).](https://arxiv.org/abs/1506.02216) +## Requirements +python == 3.5 +tensorflow == 1.2.1 +numpy==1.13.1 -![VRNN Structure](graph1.png?raw=true "VRNN Structure") +## main.py +* train this model +```python +python main.py +``` +## cell.py +* **VRNNCell** structure + +## utils.py +* Basic functions implementation + +## ops.py +* Basic operations based on tensorflow +## config.py +* Basic configuration of model +* Every configuration can be changed here. + +![VRNN Structure](graph1.png?raw=true "VRNN Structure") ![Global Structure](graph2.png?raw=true "Global Structure") diff --git a/cell.py b/cell.py new file mode 100644 index 0000000..f571847 --- /dev/null +++ b/cell.py @@ -0,0 +1,69 @@ +from ops import fc_layer, get_shape, print_vars +import tensorflow as tf +import numpy as np + +class VRNNCell(tf.nn.rnn_cell.RNNCell): + """Variational RNN cell.""" + + def __init__(self, x_dim, h_dim, z_dim = 100): + ''' + Args: + x_dim - chunk_samples + h_dim - rnn_size + z_dim - latent_size + ''' + self.n_h = h_dim + self.n_x = x_dim + self.n_z = z_dim + self.n_x_1 = x_dim + self.n_z_1 = z_dim + self.n_enc_hidden = z_dim + self.n_dec_hidden = x_dim + self.n_prior_hidden = z_dim + self.lstm = tf.nn.rnn_cell.LSTMCell(self.n_h, state_is_tuple=True) + + @property + def state_size(self): + return (self.n_h, self.n_h) + + @property + def output_size(self): + return self.n_h + + def __call__(self, x, state, scope=None): + ''' + Args: + x - input 2D tensor [batch_size x 2*self.chunk_samples] + state - tuple + (hidden, cell_state) + scope - string + defaults to be None + ''' + with tf.variable_scope(scope or type(self).__name__): + h, c = state + with tf.variable_scope("Prior"): + prior_hidden = fc_layer(h, self.n_prior_hidden, activation = tf.nn.relu, scope = "hidden") + prior_mu = fc_layer(prior_hidden, self.n_z, scope = "mu") + prior_sigma = fc_layer(prior_hidden, self.n_z, activation = tf.nn.softplus, scope = "sigma")# >=0 + + x_1 = fc_layer(x, self.n_x_1, activation = tf.nn.relu, scope = "phi_x")# >=0 + + with tf.variable_scope("Encoder"): + enc_hidden = fc_layer(tf.concat(values=(x_1, h), axis=1), self.n_enc_hidden, activation = tf.nn.relu, scope = "hidden") + enc_mu = fc_layer(enc_hidden, self.n_z, scope = 'mu') + enc_sigma = fc_layer(enc_hidden, self.n_z, activation = tf.nn.softplus, scope = 'sigma') + + # Random sampling ~ N(0, 1) + eps = tf.random_normal((get_shape(x)[0], self.n_z), 0.0, 1.0, dtype=tf.float32) + # z = mu + sigma*epsilon, latent variable from reparametrization trick + z = tf.add(enc_mu, tf.multiply(enc_sigma, eps)) + z_1 = fc_layer(z, self.n_z_1, activation = tf.nn.relu, scope = "phi_z") + + with tf.variable_scope("Decoder"): + dec_hidden = fc_layer(tf.concat(values=(z_1, h), axis=1), self.n_dec_hidden, activation = tf.nn.relu, scope = "hidden") + dec_mu = fc_layer(dec_hidden, self.n_x, scope = "mu") + dec_sigma = fc_layer(dec_hidden, self.n_x, activation = tf.nn.softplus, scope = "sigma") + + output, next_state = self.lstm(tf.concat(values=(x_1, z_1), axis=1), state) + + return (enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma), next_state diff --git a/config.py b/config.py new file mode 100644 index 0000000..f2a4d8e --- /dev/null +++ b/config.py @@ -0,0 +1,22 @@ +#=================================PATH=========================# + +SAVE_DIR = './save/' + +#======================VRNN configuration=======================# + +class VRNNConfig(object): + def __init__(self): + self.rnn_size = 3 # num of hidden states in RNN + self.latent_size = 3 # size of latent space + + self.seq_length = 100 # RNN sequence length + self.chunk_samples = 1 # number of samples per mdct chunk + + self.num_epochs = 5 + self.batch_size = 3000 + self.n_batches = 100 + self.log_every = 20 + + self.grad_clip = 10 # clip gradients at this value + self.decay_rate = 1. + self.lr = 0.0005 # initial learning_rate diff --git a/main.py b/main.py new file mode 100644 index 0000000..5216979 --- /dev/null +++ b/main.py @@ -0,0 +1,249 @@ +from utils import create_dir, pickle_save +from config import SAVE_DIR, VRNNConfig +from datetime import datetime +from ops import print_vars +from cell import VRNNCell + +import tensorflow as tf +import numpy as np +import logging +import pickle +import os + +logging.basicConfig(format = "[%(asctime)s] %(message)s", datefmt="%m%d %H:%M:%S") +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + +class VRNN(VRNNConfig): + def __init__(self, istest=False): + VRNNConfig.__init__(self) + logger.info("Building model starts...") + def NLL(y, mu, sigma): + '''Negative LogLiklihood + - log(1/sqrt(2*pi)e-(y-mu)^2/2/sigma^2) + = + 1/2*(log(2*pi)+ (y-mu)^2/2/sigma^2) + Args : + y - [batch_size x seq_length, 2*chunk_samples] + mu - [batch_size x seq_length, chunk_samples] + sigma - [batch_size x seq_length, chunk_samples] + return + NLL + ''' + with tf.variable_scope('NLL'): + sigma_square = tf.maximum(1e-10, tf.square(sigma)) # sigma^2, avoid to be zero + norm = tf.subtract(y[:,:self.chunk_samples], mu) # x-\mu + z = tf.div(tf.square(norm), sigma_square) # (x-\mu)^2/sigma^2 + denom_log = tf.log(2*np.pi*sigma_square) + return 0.5*tf.reduce_sum(z+denom_log, 1) + + def kl_gaussian(mu_1, sigma_1, mu_2, sigma_2): + ''' + Kullback leibler divergence for two gaussian distributions + ''' + with tf.variable_scope("kl_gaussisan"): + return tf.reduce_sum(0.5 * ( + 2 * tf.log(tf.maximum(1e-9, sigma_2),name='log_sigma_2') + - 2 * tf.log(tf.maximum(1e-9, sigma_1),name='log_sigma_1') + + (tf.square(sigma_1) + tf.square(mu_1 - mu_2)) / tf.maximum(1e-9,(tf.square(sigma_2))) - 1), 1) + + def get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma, y): + kl_loss = kl_gaussian(enc_mu, enc_sigma, prior_mu, prior_sigma) # KL_divergence loss + likelihood_loss = NLL(y, dec_mu, dec_sigma) # Negative log liklihood loss + return tf.reduce_mean(kl_loss + likelihood_loss) + + if istest: + self.batch_size = 1 + self.seq_length = 1 + logger.info("Building VRNNCell starts...") + self.cell = VRNNCell(self.chunk_samples, self.rnn_size, self.latent_size) + logger.info("Building VRNNCell done.") + + # [batch_size, seq_length, chunk_samples*2] + self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.batch_size, self.seq_length, 2*self.chunk_samples], name='input_data') + # [batch_size, seq_length, chunk_samples*2] + self.target_data = tf.placeholder(dtype=tf.float32, shape=[self.batch_size, self.seq_length, 2*self.chunk_samples], name = 'target_data') + # [batch_size, rnn_size] + self.initial_state_c, self.initial_state_h = self.cell.zero_state(batch_size=self.batch_size, dtype=tf.float32) + + with tf.variable_scope("inputs"): + inputs = tf.transpose(self.input_data, [1, 0, 2]) # [seq_length, batch_size, 2*chunk_samples] + inputs = tf.reshape(inputs, [-1, 2*self.chunk_samples]) # [seq_length*batch_size, 2*chunk_samples] + inputs = tf.split(axis=0, num_or_size_splits=self.seq_length, value=inputs) # seq_length * [batch_size, 2*chunk_samples] + + # [batch_size* seq_length, chunk_samples*2] + self.target = tf.reshape(self.target_data, [-1, 2*self.chunk_samples]) + + outputs, last_state = tf.contrib.rnn.static_rnn(self.cell, inputs, initial_state=(self.initial_state_c, self.initial_state_h)) + # outputs seq_length*tuple*[batch_size, chunk_samples] + outputs_reshape = [] + names = ["enc_mu", "enc_sigma", "dec_mu", "dec_sigma", "prior_mu", "prior_sigma"] + + for n, name in enumerate(names): + with tf.variable_scope(name): + x = tf.stack([o[n] for o in outputs]) # [seq_length, batch_size, chunk_samples] + x = tf.transpose(x,[1,0,2]) # [batch_size, seq_length, chunk_samples] + x = tf.reshape(x, [self.batch_size*self.seq_length, -1]) # [batch_size x seq_length, chunk_samples] + outputs_reshape.append(x) + # tuple*[batch_size x seq_length, chunk_samples] + enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma = outputs_reshape + self.mu = dec_mu + self.sigma = dec_sigma + + self.final_state_c, self.final_state_h = last_state + self.cost = get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma, self.target) + + print_vars("trainable_variables") + self.lr = tf.Variable(self.lr, trainable = False) + self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.cost) + logger.info("Building model done.") + + self.sess = tf.Session() + + def next_batch(self): + ''' + 3D signal + [batch_axis, time_axis, chunk_axis] + = common noise + noise + sin(time_axis[:] + time_offset) + + half of the chunk_axis are all zeros + + Return: + x, y + x - 3D ndarray + [self.batch_size, self.seq_length, 2*self.chunk_samples] + y - 3D ndarray + [self.batch_size, self.seq_length, 2*self.chunk_samples] + + ''' + t_offset = np.random.randn(self.batch_size, 1, (2 * self.chunk_samples)) + mixed_noise = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.01 + + x = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.1 + mixed_noise + np.sin(2 * np.pi * (np.arange(self.seq_length)[np.newaxis, :, np.newaxis] / 10. + t_offset)) + y = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.1 + mixed_noise + np.sin(2 * np.pi * (np.arange(1, self.seq_length+1)[np.newaxis, :, np.newaxis] / 10. + t_offset)) + + y[:, :, self.chunk_samples:] = 0. + x[:, :, self.chunk_samples:] = 0. + return x, y + + def initialize(self): + logger.info("Initialization of parameters") + self.sess.run(tf.global_variables_initializer()) + + def restore(self): + saver = tf.train.Saver(tf.global_variables()) + ckpt = tf.train.get_checkpoint_state(SAVE_DIR) + print("Load the model from {}".format(ckpt.model_checkpoint_path)) + saver.restore(self.sess, ckpt.model_checkpoint_path) + + def train(self): + create_dir(SAVE_DIR) + ckpt = tf.train.get_checkpoint_state(SAVE_DIR) + saver = tf.train.Saver(tf.global_variables()) + + if ckpt: + saver.restore(self.sess, ckpt.model_checkpoint_path) + print("Load the model from %s"%ckpt.model_checkpoint_path) + + iteration = 0 + for epoch in range(self.num_epochs): + # Learning rate decay + self.sess.run(tf.assign(self.lr, self.lr * (self.decay_rate ** epoch))) + + for batch in range(self.n_batches): + x, y = self.next_batch() + feed_dict = {model.input_data: x, model.target_data: y} + train_loss, _, sigma= self.sess.run([self.cost, self.train_op, self.sigma], feed_dict = feed_dict) + + iteration+=1 + if iteration % self.log_every == 0 and iteration > 0: + print("{}/{}(epoch {}), train_loss = {:.6f}, std = {:.3f}".format(iteration, self.num_epochs * self.n_batches, epoch+1, self.chunk_samples * train_loss, sigma.mean(axis=0).mean(axis=0))) + checkpoint_path = os.path.join(SAVE_DIR, 'model.ckpt') + saver.save(self.sess, checkpoint_path, global_step=iteration) + logger.info("model saved to {}".format(checkpoint_path)) + + + def sample(self, num=4410, start=None): + ''' + Args : + num - int + 4410 + start - sequence + None => generate [1, 1, 2*self.chunk_samples] + start.shape==1 => generate [1, 1, 2*self.chunk_samples] + start.shape==2 [seq, 2*self.chunk_samples] + => generate( + Return : + chunks - + mus - + sigmas - + ''' + def sample_gaussian(mu, sigma): + return mu + (sigma*np.random.randn(*sigma.shape)) + + # Initial condition + prev_state = self.sess.run(self.cell.zero_state(1, tf.float32)) # [batch_size, rnn_size] + + if start is None: + prev_x = np.random.randn(1, 1, 2*self.chunk_samples) + elif len(start.shape) == 1: + prev_x = start[np.newaxis,np.newaxis,:] + elif len(start.shape) == 2: + for i in range(start.shape[0]-1): + prev_x = start[i,:] # [2*self.chunk_samples] + prev_x = prev_x[np.newaxis,np.newaxis,:] #[1, 1, 2*self.chunk_samples] + + feed_dict = { + self.input_data : prev_x, + self.initial_state_c : prev_state[0], + self.initial_state_h : prev_state[1] + } + + [prev_state_c, prev_state_h] = self.sess.run( + [self.mu, self.sigma, self.final_state_c, self.final_state_h], + feed_dict=feed_dict + ) + prev_state = prev_state_c, prev_state_h + + prev_x = start[-1,:] # [2*self.chunk_samples] + prev_x = prev_x[np.newaxis,np.newaxis,:] # [1,1,2*self.chunk_samples] + + chunks = np.zeros((num, 2*self.chunk_samples), dtype=np.float32) + mus = np.zeros((num, self.chunk_samples), dtype=np.float32) + sigmas = np.zeros((num, self.chunk_samples), dtype=np.float32) + + for i in range(num): + feed_dict = { + self.input_data : prev_x, + self.initial_state_c : prev_state[0], + self.initial_state_h : prev_state[1] + } + + [o_mu, o_sigma, next_state_c, next_state_h] = self.sess.run( + [self.mu, self.sigma, self.final_state_c, self.final_state_h], + feed_dict = feed_dict + ) + next_x = np.hstack( + ( + sample_gaussian(o_mu, o_sigma), np.zeros((1, self.chunk_samples)) + ) + ) # [1, 2*self.chunk_samples] + chunks[i] = next_x + mus[i] = o_mu + sigmas[i] = o_sigma + + prev_x = np.zeros((1, 1, 2*self.chunk_samples), dtype=np.float32) + prev_x[0] = next_x + prev_state = next_state_c, next_state_h + + return chunks, mus, sigmas + +if __name__ == '__main__': + model = VRNN() + model.initialize() + model.train() + ''' + Test code + model2 = VRNN(True) + model2.restore() + print(model2.sample()) + ''' diff --git a/model_vrnn.py b/model_vrnn.py deleted file mode 100644 index e3986fa..0000000 --- a/model_vrnn.py +++ /dev/null @@ -1,229 +0,0 @@ -import tensorflow as tf -import numpy as np - -def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False): - shape = input_.get_shape().as_list() - - with tf.variable_scope(scope or "Linear"): - matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32, - tf.random_normal_initializer(stddev=stddev)) - bias = tf.get_variable("bias", [output_size], - initializer=tf.constant_initializer(bias_start)) - if with_w: - return tf.matmul(input_, matrix) + bias, matrix, bias - else: - return tf.matmul(input_, matrix) + bias - -class VartiationalRNNCell(tf.contrib.rnn.RNNCell): - """Variational RNN cell.""" - - def __init__(self, x_dim, h_dim, z_dim = 100): - self.n_h = h_dim - self.n_x = x_dim - self.n_z = z_dim - self.n_x_1 = x_dim - self.n_z_1 = z_dim - self.n_enc_hidden = z_dim - self.n_dec_hidden = x_dim - self.n_prior_hidden = z_dim - self.lstm = tf.contrib.rnn.LSTMCell(self.n_h, state_is_tuple=True) - - - @property - def state_size(self): - return (self.n_h, self.n_h) - - @property - def output_size(self): - return self.n_h - - def __call__(self, x, state, scope=None): - with tf.variable_scope(scope or type(self).__name__): - h, c = state - - with tf.variable_scope("Prior"): - with tf.variable_scope("hidden"): - prior_hidden = tf.nn.relu(linear(h, self.n_prior_hidden)) - with tf.variable_scope("mu"): - prior_mu = linear(prior_hidden, self.n_z) - with tf.variable_scope("sigma"): - prior_sigma = tf.nn.softplus(linear(prior_hidden, self.n_z)) - - with tf.variable_scope("phi_x"): - x_1 = tf.nn.relu(linear(x, self.n_x_1)) - - with tf.variable_scope("Encoder"): - with tf.variable_scope("hidden"): - enc_hidden = tf.nn.relu(linear(tf.concat(axis=1,values=(x_1, h)), self.n_enc_hidden)) - with tf.variable_scope("mu"): - enc_mu = linear(enc_hidden, self.n_z) - with tf.variable_scope("sigma"): - enc_sigma = tf.nn.softplus(linear(enc_hidden, self.n_z)) - eps = tf.random_normal((x.get_shape().as_list()[0], self.n_z), 0.0, 1.0, dtype=tf.float32) - # z = mu + sigma*epsilon - z = tf.add(enc_mu, tf.multiply(enc_sigma, eps)) - with tf.variable_scope("phi_z"): - z_1 = tf.nn.relu(linear(z, self.n_z_1)) - - with tf.variable_scope("Decoder"): - with tf.variable_scope("hidden"): - dec_hidden = tf.nn.relu(linear(tf.concat(axis=1,values=(z_1, h)), self.n_dec_hidden)) - with tf.variable_scope("mu"): - dec_mu = linear(dec_hidden, self.n_x) - with tf.variable_scope("sigma"): - dec_sigma = tf.nn.softplus(linear(dec_hidden, self.n_x)) - with tf.variable_scope("rho"): - dec_rho = tf.nn.sigmoid(linear(dec_hidden, self.n_x)) - - - output, state2 = self.lstm(tf.concat(axis=1,values=(x_1, z_1)), state) - return (enc_mu, enc_sigma, dec_mu, dec_sigma, dec_rho, prior_mu, prior_sigma), state2 - - - - -class VRNN(): - def __init__(self, args, sample=False): - - def tf_normal(y, mu, s, rho): - with tf.variable_scope('normal'): - ss = tf.maximum(1e-10,tf.square(s)) - norm = tf.subtract(y[:,:args.chunk_samples], mu) - z = tf.div(tf.square(norm), ss) - denom_log = tf.log(2*np.pi*ss, name='denom_log') - result = tf.reduce_sum(z+denom_log, 1)/2# - - #(tf.log(tf.maximum(1e-20,rho),name='log_rho')*(1+y[:,args.chunk_samples:]) - # +tf.log(tf.maximum(1e-20,1-rho),name='log_rho_inv')*(1-y[:,args.chunk_samples:]))/2, 1) - - return result - - def tf_kl_gaussgauss(mu_1, sigma_1, mu_2, sigma_2): - with tf.variable_scope("kl_gaussgauss"): - return tf.reduce_sum(0.5 * ( - 2 * tf.log(tf.maximum(1e-9,sigma_2),name='log_sigma_2') - - 2 * tf.log(tf.maximum(1e-9,sigma_1),name='log_sigma_1') - + (tf.square(sigma_1) + tf.square(mu_1 - mu_2)) / tf.maximum(1e-9,(tf.square(sigma_2))) - 1 - ), 1) - - def get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, dec_rho, prior_mu, prior_sigma, y): - kl_loss = tf_kl_gaussgauss(enc_mu, enc_sigma, prior_mu, prior_sigma) - likelihood_loss = tf_normal(y, dec_mu, dec_sigma, dec_rho) - - return tf.reduce_mean(kl_loss + likelihood_loss) - #return tf.reduce_mean(likelihood_loss) - - self.args = args - if sample: - args.batch_size = 1 - args.seq_length = 1 - - cell = VartiationalRNNCell(args.chunk_samples, args.rnn_size, args.latent_size) - - self.cell = cell - - self.input_data = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, args.seq_length, 2*args.chunk_samples], name='input_data') - self.target_data = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, args.seq_length, 2*args.chunk_samples],name = 'target_data') - self.initial_state_c, self.initial_state_h = cell.zero_state(batch_size=args.batch_size, dtype=tf.float32) - - - # input shape: (batch_size, n_steps, n_input) - with tf.variable_scope("inputs"): - inputs = tf.transpose(self.input_data, [1, 0, 2]) # permute n_steps and batch_size - inputs = tf.reshape(inputs, [-1, 2*args.chunk_samples]) # (n_steps*batch_size, n_input) - - # Split data because rnn cell needs a list of inputs for the RNN inner loop - inputs = tf.split(axis=0, num_or_size_splits=args.seq_length, value=inputs) # n_steps * (batch_size, n_hidden) - flat_target_data = tf.reshape(self.target_data,[-1, 2*args.chunk_samples]) - - self.target = flat_target_data - self.flat_input = tf.reshape(tf.transpose(tf.stack(inputs),[1,0,2]),[args.batch_size*args.seq_length, -1]) - self.input = tf.stack(inputs) - # Get vrnn cell output - outputs, last_state = tf.contrib.rnn.static_rnn(cell, inputs, initial_state=(self.initial_state_c,self.initial_state_h)) - #print outputs - #outputs = map(tf.pack,zip(*outputs)) - outputs_reshape = [] - names = ["enc_mu", "enc_sigma", "dec_mu", "dec_sigma", "dec_rho", "prior_mu", "prior_sigma"] - for n,name in enumerate(names): - with tf.variable_scope(name): - x = tf.stack([o[n] for o in outputs]) - x = tf.transpose(x,[1,0,2]) - x = tf.reshape(x,[args.batch_size*args.seq_length, -1]) - outputs_reshape.append(x) - - enc_mu, enc_sigma, dec_mu, dec_sigma, dec_rho, prior_mu, prior_sigma = outputs_reshape - self.final_state_c,self.final_state_h = last_state - self.mu = dec_mu - self.sigma = dec_sigma - self.rho = dec_rho - - lossfunc = get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, dec_sigma, prior_mu, prior_sigma, flat_target_data) - self.sigma = dec_sigma - self.mu = dec_mu - with tf.variable_scope('cost'): - self.cost = lossfunc - tf.summary.scalar('cost', self.cost) - tf.summary.scalar('mu', tf.reduce_mean(self.mu)) - tf.summary.scalar('sigma', tf.reduce_mean(self.sigma)) - - - self.lr = tf.Variable(0.0, trainable=False) - tvars = tf.trainable_variables() - for t in tvars: - print t.name - grads = tf.gradients(self.cost, tvars) - #grads = tf.cond( - # tf.global_norm(grads) > 1e-20, - # lambda: tf.clip_by_global_norm(grads, args.grad_clip)[0], - # lambda: grads) - optimizer = tf.train.AdamOptimizer(self.lr) - self.train_op = optimizer.apply_gradients(zip(grads, tvars)) - #self.saver = tf.train.Saver(tf.all_variables()) - - def sample(self, sess, args, num=4410, start=None): - - def sample_gaussian(mu, sigma): - return mu + (sigma*np.random.randn(*sigma.shape)) - - if start is None: - prev_x = np.random.randn(1, 1, 2*args.chunk_samples) - elif len(start.shape) == 1: - prev_x = start[np.newaxis,np.newaxis,:] - elif len(start.shape) == 2: - for i in range(start.shape[0]-1): - prev_x = start[i,:] - prev_x = prev_x[np.newaxis,np.newaxis,:] - feed = {self.input_data: prev_x, - self.initial_state_c:prev_state[0], - self.initial_state_h:prev_state[1]} - - [o_mu, o_sigma, o_rho, prev_state_c, prev_state_h] = sess.run( - [self.mu, self.sigma, self.rho, - self.final_state_c,self.final_state_h],feed) - - prev_x = start[-1,:] - prev_x = prev_x[np.newaxis,np.newaxis,:] - - prev_state = sess.run(self.cell.zero_state(1, tf.float32)) - chunks = np.zeros((num, 2*args.chunk_samples), dtype=np.float32) - mus = np.zeros((num, args.chunk_samples), dtype=np.float32) - sigmas = np.zeros((num, args.chunk_samples), dtype=np.float32) - - for i in xrange(num): - feed = {self.input_data: prev_x, - self.initial_state_c:prev_state[0], - self.initial_state_h:prev_state[1]} - [o_mu, o_sigma, o_rho, next_state_c, next_state_h] = sess.run([self.mu, self.sigma, - self.rho, self.final_state_c, self.final_state_h],feed) - - next_x = np.hstack((sample_gaussian(o_mu, o_sigma), - 2.*(o_rho > np.random.random(o_rho.shape[:2]))-1.)) - chunks[i] = next_x - mus[i] = o_mu - sigmas[i] = o_sigma - - prev_x = np.zeros((1, 1, 2*args.chunk_samples), dtype=np.float32) - prev_x[0][0] = next_x - prev_state = next_state_c, next_state_h - - return chunks, mus, sigmas diff --git a/ops.py b/ops.py new file mode 100644 index 0000000..87fcad4 --- /dev/null +++ b/ops.py @@ -0,0 +1,42 @@ +import tensorflow as tf + +def get_shape(tensor): + '''return the shape of tensor as list''' + return tensor.get_shape().as_list() + +def print_vars(string): + '''print variables in collection named string''' + print("Collection name %s"%string) + print(" "+"\n ".join(["{} : {}".format(v.name, get_shape(v)) for v in tf.get_collection(string)])) + +def fc_layer(input_, output_size, activation = None, batch_norm = False, istrain = False, scope = None): + ''' + fully convlolution layer + Args : + input_ - 2D tensor + general shape : [batch, input_size] + output_size - int + shape of output 2D tensor + activation - activation function + defaults to be None + batch_norm - bool + defaults to be False + if batch_norm to apply batch_normalization + istrain - bool + defaults to be False + indicator for phase train or not + scope - string + defaults to be None then scope becomes "fc" + ''' + with tf.variable_scope(scope or "fc"): + w = tf.get_variable(name="w", shape = [get_shape(input_)[1], output_size], initializer=tf.contrib.layers.xavier_initializer()) + if batch_norm: + norm = tf.contrib.layers.batch_norm(tf.matmul(input_, w) , center=True, scale=True, decay = 0.8, is_training=istrain, scope='batch_norm') + if activation is None: + return norm + return activation(norm) + else: + b = tf.get_variable(name="b", shape = [output_size], initializer=tf.constant_initializer(0.01)) + if activation is None: + return tf.nn.xw_plus_b(input_, w, b) + return activation(tf.nn.xw_plus_b(input_, w, b)) diff --git a/sample_vrnn.py b/sample_vrnn.py deleted file mode 100644 index b8e4bc2..0000000 --- a/sample_vrnn.py +++ /dev/null @@ -1,21 +0,0 @@ -import tensorflow as tf - -import os -import cPickle -from model_vrnn import VRNN -import numpy as np - -from train_vrnn import next_batch - -with open(os.path.join('save-vrnn', 'config.pkl')) as f: - saved_args = cPickle.load(f) - -model = VRNN(saved_args, True) -sess = tf.InteractiveSession() -saver = tf.train.Saver(tf.all_variables()) - -ckpt = tf.train.get_checkpoint_state('save-vrnn') -print "loading model: ",ckpt.model_checkpoint_path - -saver.restore(sess, ckpt.model_checkpoint_path) -sample_data,mus,sigmas = model.sample(sess,saved_args) diff --git a/train_vrnn.py b/train_vrnn.py deleted file mode 100644 index c79f8b7..0000000 --- a/train_vrnn.py +++ /dev/null @@ -1,110 +0,0 @@ -import numpy as np -import tensorflow as tf - -import argparse -import glob -import time -from datetime import datetime -import os -import cPickle - -from model_vrnn import VRNN - -from matplotlib import pyplot as plt - -''' -TODOS: - - parameters for depth and width of hidden layers - - implement predict function - - separate binary and gaussian variables - - clean up nomenclature to remove MDCT references - - implement separate MDCT training and sampling version -''' - -def next_batch(args): - t0 = np.random.randn(args.batch_size, 1, (2 * args.chunk_samples)) - mixed_noise = np.random.randn( - args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1 - #x = t0 + mixed_noise + np.random.randn( - # args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1 - #y = t0 + mixed_noise + np.random.randn( - # args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1 - x = np.sin(2 * np.pi * (np.arange(args.seq_length)[np.newaxis, :, np.newaxis] / 10. + t0)) + np.random.randn( - args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1 + mixed_noise*0.1 - y = np.sin(2 * np.pi * (np.arange(1, args.seq_length + 1)[np.newaxis, :, np.newaxis] / 10. + t0)) + np.random.randn( - args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1 + mixed_noise*0.1 - - y[:, :, args.chunk_samples:] = 0. - x[:, :, args.chunk_samples:] = 0. - return x, y - - -def train(args, model): - dirname = 'save-vrnn' - if not os.path.exists(dirname): - os.makedirs(dirname) - - with open(os.path.join(dirname, 'config.pkl'), 'w') as f: - cPickle.dump(args, f) - - ckpt = tf.train.get_checkpoint_state(dirname) - n_batches = 100 - with tf.Session() as sess: - summary_writer = tf.summary.FileWriter('logs/' + datetime.now().isoformat().replace(':', '-'), sess.graph) - check = tf.add_check_numerics_ops() - merged = tf.summary.merge_all() - tf.global_variables_initializer().run() - saver = tf.train.Saver(tf.global_variables()) - if ckpt: - saver.restore(sess, ckpt.model_checkpoint_path) - print "Loaded model" - start = time.time() - for e in xrange(args.num_epochs): - sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) - state = model.initial_state_c, model.initial_state_h - for b in xrange(n_batches): - x, y = next_batch(args) - feed = {model.input_data: x, model.target_data: y} - train_loss, _, cr, summary, sigma, mu, input, target= sess.run( - [model.cost, model.train_op, check, merged, model.sigma, model.mu, model.flat_input, model.target], - feed) - summary_writer.add_summary(summary, e * n_batches + b) - if (e * n_batches + b) % args.save_every == 0 and ((e * n_batches + b) > 0): - checkpoint_path = os.path.join(dirname, 'model.ckpt') - saver.save(sess, checkpoint_path, global_step=e * n_batches + b) - print "model saved to {}".format(checkpoint_path) - end = time.time() - print "{}/{} (epoch {}), train_loss = {:.6f}, time/batch = {:.1f}, std = {:.3f}" \ - .format(e * n_batches + b, - args.num_epochs * n_batches, - e, args.chunk_samples * train_loss, end - start, sigma.mean(axis=0).mean(axis=0)) - start = time.time() - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--rnn_size', type=int, default=3, - help='size of RNN hidden state') - parser.add_argument('--latent_size', type=int, default=3, - help='size of latent space') - parser.add_argument('--batch_size', type=int, default=3000, - help='minibatch size') - parser.add_argument('--seq_length', type=int, default=100, - help='RNN sequence length') - parser.add_argument('--num_epochs', type=int, default=100, - help='number of epochs') - parser.add_argument('--save_every', type=int, default=500, - help='save frequency') - parser.add_argument('--grad_clip', type=float, default=10., - help='clip gradients at this value') - parser.add_argument('--learning_rate', type=float, default=0.0005, - help='learning rate') - parser.add_argument('--decay_rate', type=float, default=1., - help='decay of learning rate') - parser.add_argument('--chunk_samples', type=int, default=1, - help='number of samples per mdct chunk') - args = parser.parse_args() - - model = VRNN(args) - - train(args, model) diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..64e46af --- /dev/null +++ b/utils.py @@ -0,0 +1,17 @@ +import os +import pickle + +def create_dir(dirname): + if not os.path.exists(dirname): + os.makedirs(dirname) + +def pickle_load(path): + '''Load the picke data from path''' + with open(path, 'rb') as f: + loaded_pickle = pickle.load(f) + return loaded_pickle + +def pickle_save(content, path): + '''Save the content on the path''' + with open(path, 'wb') as f: + pickle.dump(content, f)