From c1897f8a66f5b6c5d913a7c2e66cdcc6c9e1987c Mon Sep 17 00:00:00 2001 From: maestrojeong Date: Tue, 25 Jul 2017 13:08:19 +0900 Subject: [PATCH 01/10] Change model structure --- README.md | 14 ++++++- config.py | 3 ++ model_vrnn.py => model.py | 0 train_vrnn.py => train.py | 81 +++++++++++++++------------------------ utils.py | 17 ++++++++ 5 files changed, 64 insertions(+), 51 deletions(-) create mode 100644 config.py rename model_vrnn.py => model.py (100%) rename train_vrnn.py => train.py (51%) create mode 100644 utils.py diff --git a/README.md b/README.md index a5e8b3b..a10eb6f 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,19 @@ # tensorflow-vrnn A variational recurrent neural network as described in: -Chung, J., Kastner, K., Dinh, L., Goel, K., Courville, A. C., & Bengio, Y. (2015). A recurrent latent variable model for sequential data. In Advances in neural information processing systems (pp. 2980-2988). +[Chung, J., Kastner, K., Dinh, L., Goel, K., Courville, A. C., & Bengio, Y. (2015). A recurrent latent variable model for sequential data. In Advances in neural information processing systems (pp. 2980-2988).](https://arxiv.org/abs/1506.02216) + +## train.py +* train this model + +## model.py +* **VRNN** structure + +## utils.py +* Basic functions implementation + +## config.py +* Basic configuration of model ![VRNN Structure](graph1.png?raw=true "VRNN Structure") diff --git a/config.py b/config.py new file mode 100644 index 0000000..3621ed5 --- /dev/null +++ b/config.py @@ -0,0 +1,3 @@ +#=================================PATH=========================# + +SAVE_DIR = './save-vrnn/' \ No newline at end of file diff --git a/model_vrnn.py b/model.py similarity index 100% rename from model_vrnn.py rename to model.py diff --git a/train_vrnn.py b/train.py similarity index 51% rename from train_vrnn.py rename to train.py index c79f8b7..de94da5 100644 --- a/train_vrnn.py +++ b/train.py @@ -1,16 +1,13 @@ +from config import SAVE_DIR +from utils import create_dir, pickle_save +from model_vrnn import VRNN +import matplotlib.pyplot as plt + import numpy as np import tensorflow as tf - -import argparse -import glob -import time from datetime import datetime import os -import cPickle - -from model_vrnn import VRNN - -from matplotlib import pyplot as plt +import pickle ''' TODOS: @@ -22,63 +19,49 @@ ''' def next_batch(args): - t0 = np.random.randn(args.batch_size, 1, (2 * args.chunk_samples)) + t_offset = np.random.randn(args.batch_size, 1, (2 * args.chunk_samples)) mixed_noise = np.random.randn( args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1 - #x = t0 + mixed_noise + np.random.randn( - # args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1 - #y = t0 + mixed_noise + np.random.randn( - # args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1 - x = np.sin(2 * np.pi * (np.arange(args.seq_length)[np.newaxis, :, np.newaxis] / 10. + t0)) + np.random.randn( - args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1 + mixed_noise*0.1 - y = np.sin(2 * np.pi * (np.arange(1, args.seq_length + 1)[np.newaxis, :, np.newaxis] / 10. + t0)) + np.random.randn( - args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1 + mixed_noise*0.1 + x = np.random.randn(args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1 + + mixed_noise*0.1 + + np.sin(2 * np.pi * (np.arange(args.seq_length)[np.newaxis, :, np.newaxis] / 10. + t_offset)) + y = np.random.randn(args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1 + + mixed_noise*0.1 + + np.sin(2 * np.pi * (np.arange(1, args.seq_length+1)[np.newaxis, :, np.newaxis] / 10. + t0)) y[:, :, args.chunk_samples:] = 0. x[:, :, args.chunk_samples:] = 0. return x, y - def train(args, model): - dirname = 'save-vrnn' - if not os.path.exists(dirname): - os.makedirs(dirname) - - with open(os.path.join(dirname, 'config.pkl'), 'w') as f: - cPickle.dump(args, f) + create_dir(SAVE_DIR) + pickle_path = os.path.join(SAVE_DIR, 'config.pkl') + pickle_save(args, pickle_path) - ckpt = tf.train.get_checkpoint_state(dirname) + ckpt = tf.train.get_checkpoint_state(SAVE_DIR) n_batches = 100 - with tf.Session() as sess: - summary_writer = tf.summary.FileWriter('logs/' + datetime.now().isoformat().replace(':', '-'), sess.graph) - check = tf.add_check_numerics_ops() + with tf.Session() as sess:0 merged = tf.summary.merge_all() - tf.global_variables_initializer().run() + sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) if ckpt: saver.restore(sess, ckpt.model_checkpoint_path) - print "Loaded model" - start = time.time() - for e in xrange(args.num_epochs): - sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** e))) - state = model.initial_state_c, model.initial_state_h - for b in xrange(n_batches): + print("Loaded model") + + for epoch in range(args.num_epochs): + # Learning rate decay + sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** epoch))) + + for b in range(n_batches): x, y = next_batch(args) - feed = {model.input_data: x, model.target_data: y} - train_loss, _, cr, summary, sigma, mu, input, target= sess.run( - [model.cost, model.train_op, check, merged, model.sigma, model.mu, model.flat_input, model.target], - feed) - summary_writer.add_summary(summary, e * n_batches + b) + feed_dict = {model.input_data: x, model.target_data: y} + train_loss, _, cr, sigma= sess.run([model.cost, model.train_op, check, model.sigma], feed_dict = feed_dict) + if (e * n_batches + b) % args.save_every == 0 and ((e * n_batches + b) > 0): checkpoint_path = os.path.join(dirname, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=e * n_batches + b) - print "model saved to {}".format(checkpoint_path) - end = time.time() - print "{}/{} (epoch {}), train_loss = {:.6f}, time/batch = {:.1f}, std = {:.3f}" \ - .format(e * n_batches + b, - args.num_epochs * n_batches, - e, args.chunk_samples * train_loss, end - start, sigma.mean(axis=0).mean(axis=0)) - start = time.time() + print("model saved to {}".format(checkpoint_path)) + print("{}/{}(epoch {}), train_loss = {:.6f}, std = {:.3f}".format(e * n_batches + b, args.num_epochs * n_batches, e, args.chunk_samples * train_loss, sigma.mean(axis=0).mean(axis=0))) if __name__ == '__main__': @@ -104,7 +87,5 @@ def train(args, model): parser.add_argument('--chunk_samples', type=int, default=1, help='number of samples per mdct chunk') args = parser.parse_args() - model = VRNN(args) - train(args, model) diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..64e46af --- /dev/null +++ b/utils.py @@ -0,0 +1,17 @@ +import os +import pickle + +def create_dir(dirname): + if not os.path.exists(dirname): + os.makedirs(dirname) + +def pickle_load(path): + '''Load the picke data from path''' + with open(path, 'rb') as f: + loaded_pickle = pickle.load(f) + return loaded_pickle + +def pickle_save(content, path): + '''Save the content on the path''' + with open(path, 'wb') as f: + pickle.dump(content, f) From aa610bd534ed6649d0cd58c8c1d1cb972736099d Mon Sep 17 00:00:00 2001 From: maestrojeong Date: Tue, 25 Jul 2017 13:09:32 +0900 Subject: [PATCH 02/10] Add requirements on README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index a10eb6f..48db928 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,10 @@ A variational recurrent neural network as described in: [Chung, J., Kastner, K., Dinh, L., Goel, K., Courville, A. C., & Bengio, Y. (2015). A recurrent latent variable model for sequential data. In Advances in neural information processing systems (pp. 2980-2988).](https://arxiv.org/abs/1506.02216) +## Requirements +python == 3.5 +tensorflow == 1.2.1L + ## train.py * train this model From 1ca485d2395fe95e53fa451c36edda7197dea4a5 Mon Sep 17 00:00:00 2001 From: maestrojeong Date: Tue, 25 Jul 2017 17:47:02 +0900 Subject: [PATCH 03/10] Split the operations to op.py --- model.py | 151 ++++++++++++++++++------------------------------- ops.py | 42 ++++++++++++++ sample_vrnn.py | 30 +++++----- train.py | 2 +- utils.py | 2 +- 5 files changed, 112 insertions(+), 115 deletions(-) create mode 100644 ops.py diff --git a/model.py b/model.py index e3986fa..cd803ce 100644 --- a/model.py +++ b/model.py @@ -1,20 +1,8 @@ +from ops import fc_layer, get_shape, print_vars import tensorflow as tf import numpy as np -def linear(input_, output_size, scope=None, stddev=0.02, bias_start=0.0, with_w=False): - shape = input_.get_shape().as_list() - - with tf.variable_scope(scope or "Linear"): - matrix = tf.get_variable("Matrix", [shape[1], output_size], tf.float32, - tf.random_normal_initializer(stddev=stddev)) - bias = tf.get_variable("bias", [output_size], - initializer=tf.constant_initializer(bias_start)) - if with_w: - return tf.matmul(input_, matrix) + bias, matrix, bias - else: - return tf.matmul(input_, matrix) + bias - -class VartiationalRNNCell(tf.contrib.rnn.RNNCell): +class VartiationalRNNCell(tf.nn.rnn_cell.RNNCell): """Variational RNN cell.""" def __init__(self, x_dim, h_dim, z_dim = 100): @@ -26,8 +14,7 @@ def __init__(self, x_dim, h_dim, z_dim = 100): self.n_enc_hidden = z_dim self.n_dec_hidden = x_dim self.n_prior_hidden = z_dim - self.lstm = tf.contrib.rnn.LSTMCell(self.n_h, state_is_tuple=True) - + self.lstm = tf.nn.rnn_cell.LSTMCell(self.n_h, state_is_tuple=True) @property def state_size(self): @@ -38,82 +25,70 @@ def output_size(self): return self.n_h def __call__(self, x, state, scope=None): + ''' + Args: + x - input 2D tensor + state - tuple + (hidden, cell_state) + scope - string + defaults to be None + ''' with tf.variable_scope(scope or type(self).__name__): h, c = state with tf.variable_scope("Prior"): - with tf.variable_scope("hidden"): - prior_hidden = tf.nn.relu(linear(h, self.n_prior_hidden)) - with tf.variable_scope("mu"): - prior_mu = linear(prior_hidden, self.n_z) - with tf.variable_scope("sigma"): - prior_sigma = tf.nn.softplus(linear(prior_hidden, self.n_z)) + prior_hidden = fc_layer(h, self.n_prior_hidden, activation = tf.nn.relu, scope = "hidden") + prior_mu = fc_layer(prior_hidden, self.n_z, scope = "mu") + prior_sigma = fc_layer(prior_hidden, self.n_z, activation = tf.nn.softplus, scope = "sigma")# >=0 - with tf.variable_scope("phi_x"): - x_1 = tf.nn.relu(linear(x, self.n_x_1)) + x_1 = fc_layer(x, self.n_x_1, activation = tf.nn.relu, scope = "phi_x")# >=0 with tf.variable_scope("Encoder"): - with tf.variable_scope("hidden"): - enc_hidden = tf.nn.relu(linear(tf.concat(axis=1,values=(x_1, h)), self.n_enc_hidden)) - with tf.variable_scope("mu"): - enc_mu = linear(enc_hidden, self.n_z) - with tf.variable_scope("sigma"): - enc_sigma = tf.nn.softplus(linear(enc_hidden, self.n_z)) - eps = tf.random_normal((x.get_shape().as_list()[0], self.n_z), 0.0, 1.0, dtype=tf.float32) - # z = mu + sigma*epsilon + enc_hidden = fc_layer(tf.concat(values=(x_1, h), axis=1), self.n_enc_hidden, activation = tf.nn.relu, scope = "hidden") + enc_mu = fc_layer(enc_hidden, self.n_z, scope = 'mu') + enc_sigma = fc_layer(enc_hidden, self.n_z, activation = tf.nn.softplus, scope = 'sigma') + + # Random sampling ~ N(0, 1) + eps = tf.random_normal((get_shape(x)[0], self.n_z), 0.0, 1.0, dtype=tf.float32) + # z = mu + sigma*epsilon, latent variable from reparametrization trick z = tf.add(enc_mu, tf.multiply(enc_sigma, eps)) - with tf.variable_scope("phi_z"): - z_1 = tf.nn.relu(linear(z, self.n_z_1)) + z_1 = fc_layer(z, self.n_z_1, activation = tf.nn.relu, scope = "phi_z") with tf.variable_scope("Decoder"): - with tf.variable_scope("hidden"): - dec_hidden = tf.nn.relu(linear(tf.concat(axis=1,values=(z_1, h)), self.n_dec_hidden)) - with tf.variable_scope("mu"): - dec_mu = linear(dec_hidden, self.n_x) - with tf.variable_scope("sigma"): - dec_sigma = tf.nn.softplus(linear(dec_hidden, self.n_x)) - with tf.variable_scope("rho"): - dec_rho = tf.nn.sigmoid(linear(dec_hidden, self.n_x)) - - - output, state2 = self.lstm(tf.concat(axis=1,values=(x_1, z_1)), state) - return (enc_mu, enc_sigma, dec_mu, dec_sigma, dec_rho, prior_mu, prior_sigma), state2 - - + dec_hidden = fc_layer(tf.concat(values=(z1, h), axis=1), self.n_dec_hidden, activation = tf.nn.relu, scope = "hidden") + dec_mu = fc_layer(dec_hidden, self.n_x, scope = "mu") + dec_sigma = fc_layer(dec_hidden, self.n_x, scope = "sigma") + output, next_state = self.lstm(tf.concat(values=(x_1, z_1), axis=1), state) + return (enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma), next_state class VRNN(): - def __init__(self, args, sample=False): - - def tf_normal(y, mu, s, rho): + def __init__(self, args, istest=False): + def tf_normal(y, mu, sigma): with tf.variable_scope('normal'): - ss = tf.maximum(1e-10,tf.square(s)) + sigma_square = tf.maximum(1e-10, tf.square(sigma)) norm = tf.subtract(y[:,:args.chunk_samples], mu) - z = tf.div(tf.square(norm), ss) + z = tf.div(tf.square(norm), sigma_square) denom_log = tf.log(2*np.pi*ss, name='denom_log') - result = tf.reduce_sum(z+denom_log, 1)/2# - - #(tf.log(tf.maximum(1e-20,rho),name='log_rho')*(1+y[:,args.chunk_samples:]) - # +tf.log(tf.maximum(1e-20,1-rho),name='log_rho_inv')*(1-y[:,args.chunk_samples:]))/2, 1) - + result = tf.reduce_sum(z+denom_log, 1)/2# return result - def tf_kl_gaussgauss(mu_1, sigma_1, mu_2, sigma_2): - with tf.variable_scope("kl_gaussgauss"): + def kl_gaussian(mu_1, sigma_1, mu_2, sigma_2): + with tf.variable_scope("kl_gaussisan"): return tf.reduce_sum(0.5 * ( - 2 * tf.log(tf.maximum(1e-9,sigma_2),name='log_sigma_2') - - 2 * tf.log(tf.maximum(1e-9,sigma_1),name='log_sigma_1') + 2 * tf.log(tf.maximum(1e-9, sigma_2),name='log_sigma_2') + - 2 * tf.log(tf.maximum(1e-9, sigma_1),name='log_sigma_1') + (tf.square(sigma_1) + tf.square(mu_1 - mu_2)) / tf.maximum(1e-9,(tf.square(sigma_2))) - 1 ), 1) - def get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, dec_rho, prior_mu, prior_sigma, y): - kl_loss = tf_kl_gaussgauss(enc_mu, enc_sigma, prior_mu, prior_sigma) - likelihood_loss = tf_normal(y, dec_mu, dec_sigma, dec_rho) - + def get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma, y): + kl_loss = kl_gaussian(enc_mu, enc_sigma, prior_mu, prior_sigma) # KL_divergence + likelihood_loss = tf_normal(y, dec_mu, dec_sigma) return tf.reduce_mean(kl_loss + likelihood_loss) - #return tf.reduce_mean(likelihood_loss) self.args = args - if sample: + + if istest: args.batch_size = 1 args.seq_length = 1 @@ -122,28 +97,27 @@ def get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, dec_rho, prior_mu, prior_ self.cell = cell self.input_data = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, args.seq_length, 2*args.chunk_samples], name='input_data') - self.target_data = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, args.seq_length, 2*args.chunk_samples],name = 'target_data') - self.initial_state_c, self.initial_state_h = cell.zero_state(batch_size=args.batch_size, dtype=tf.float32) - + self.target_data = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, args.seq_length, 2*args.chunk_samples], name = 'target_data') + self.initial_state_c, self.initial_state_h = self.cell.zero_state(batch_size=args.batch_size, dtype=tf.float32) # input shape: (batch_size, n_steps, n_input) with tf.variable_scope("inputs"): - inputs = tf.transpose(self.input_data, [1, 0, 2]) # permute n_steps and batch_size + inputs = tf.transpose(self.input_data, [1, 0, 2]) # [n_steps, batch_size, n_input] inputs = tf.reshape(inputs, [-1, 2*args.chunk_samples]) # (n_steps*batch_size, n_input) # Split data because rnn cell needs a list of inputs for the RNN inner loop inputs = tf.split(axis=0, num_or_size_splits=args.seq_length, value=inputs) # n_steps * (batch_size, n_hidden) - flat_target_data = tf.reshape(self.target_data,[-1, 2*args.chunk_samples]) + flat_target_data = tf.reshape(self.target_data, [-1, 2*args.chunk_samples]) self.target = flat_target_data - self.flat_input = tf.reshape(tf.transpose(tf.stack(inputs),[1,0,2]),[args.batch_size*args.seq_length, -1]) + self.flat_input = tf.reshape(tf.transpose(tf.stack(inputs), [1,0,2]), [args.batch_size*args.seq_length, -1]) self.input = tf.stack(inputs) # Get vrnn cell output - outputs, last_state = tf.contrib.rnn.static_rnn(cell, inputs, initial_state=(self.initial_state_c,self.initial_state_h)) + outputs, last_state = tf.contrib.rnn.static_rnn(cell, inputs, initial_state=(self.initial_state_c, self.initial_state_h)) #print outputs #outputs = map(tf.pack,zip(*outputs)) outputs_reshape = [] - names = ["enc_mu", "enc_sigma", "dec_mu", "dec_sigma", "dec_rho", "prior_mu", "prior_sigma"] + names = ["enc_mu", "enc_sigma", "dec_mu", "dec_sigma", "prior_mu", "prior_sigma"] for n,name in enumerate(names): with tf.variable_scope(name): x = tf.stack([o[n] for o in outputs]) @@ -151,34 +125,17 @@ def get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, dec_rho, prior_mu, prior_ x = tf.reshape(x,[args.batch_size*args.seq_length, -1]) outputs_reshape.append(x) - enc_mu, enc_sigma, dec_mu, dec_sigma, dec_rho, prior_mu, prior_sigma = outputs_reshape + enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma = outputs_reshape self.final_state_c,self.final_state_h = last_state self.mu = dec_mu self.sigma = dec_sigma - self.rho = dec_rho - lossfunc = get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, dec_sigma, prior_mu, prior_sigma, flat_target_data) + self.cost = get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, dec_sigma, prior_mu, prior_sigma, flat_target_data) self.sigma = dec_sigma self.mu = dec_mu - with tf.variable_scope('cost'): - self.cost = lossfunc - tf.summary.scalar('cost', self.cost) - tf.summary.scalar('mu', tf.reduce_mean(self.mu)) - tf.summary.scalar('sigma', tf.reduce_mean(self.sigma)) - - - self.lr = tf.Variable(0.0, trainable=False) - tvars = tf.trainable_variables() - for t in tvars: - print t.name - grads = tf.gradients(self.cost, tvars) - #grads = tf.cond( - # tf.global_norm(grads) > 1e-20, - # lambda: tf.clip_by_global_norm(grads, args.grad_clip)[0], - # lambda: grads) - optimizer = tf.train.AdamOptimizer(self.lr) - self.train_op = optimizer.apply_gradients(zip(grads, tvars)) - #self.saver = tf.train.Saver(tf.all_variables()) + + print_vars("trainable_variables") + self.train_op = tf.train.AdamOptimizer().minimize(self.cost) def sample(self, sess, args, num=4410, start=None): diff --git a/ops.py b/ops.py new file mode 100644 index 0000000..e4b672f --- /dev/null +++ b/ops.py @@ -0,0 +1,42 @@ +import tensorflow as tf + +def print_vars(string): + '''print variables in collection named string''' + print("Collection name %s"%string) + print([v.name for v in tf.get_collection(string)]) + +def get_shape(tensor): + '''return the shape of tensor as list''' + return tensor.get_shape().as_list() + +def fc_layer(input_, output_size, activation = None, batch_norm = False, istrain = False, scope = None): + ''' + fully convlolution layer + Args : + input_ - 2D tensor + general shape : [batch, input_size] + output_size - int + shape of output 2D tensor + activation - activation function + defaults to be None + batch_norm - bool + defaults to be False + if batch_norm to apply batch_normalization + istrain - bool + defaults to be False + indicator for phase train or not + scope - string + defaults to be None then scope becomes "fc" + ''' + with tf.variable_scope(scope or "fc"): + w = tf.get_variable(name="w", shape = [get_shape(input_)[1], output_size], initializer=tf.contrib.layers.xavier_initializer()) + if batch_norm: + norm = tf.contrib.layers.batch_norm(tf.matmul(input_, w) , center=True, scale=True, decay = 0.8, is_training=istrain, scope='batch_norm') + if activation is None: + return norm + return activation(norm) + else: + b = tf.get_variable(name="b", shape = [output_size], initializer=tf.constant_initializer(0.01)) + if activation is None: + return tf.nn.xw_plus_b(input_, w, b) + return activation(tf.nn.xw_plus_b(input_, w, b)) diff --git a/sample_vrnn.py b/sample_vrnn.py index b8e4bc2..660fcee 100644 --- a/sample_vrnn.py +++ b/sample_vrnn.py @@ -1,21 +1,19 @@ +from train import next_batch +from utils import pickle_load +from model import VRNN +from config import SAVE_DIR import tensorflow as tf - -import os -import cPickle -from model_vrnn import VRNN import numpy as np +import pickle +import os -from train_vrnn import next_batch - -with open(os.path.join('save-vrnn', 'config.pkl')) as f: - saved_args = cPickle.load(f) - -model = VRNN(saved_args, True) -sess = tf.InteractiveSession() -saver = tf.train.Saver(tf.all_variables()) - -ckpt = tf.train.get_checkpoint_state('save-vrnn') -print "loading model: ",ckpt.model_checkpoint_path +load_path = os.path.join(SAVE_DIR, 'config.pkl') +loaded_args = pickle_load(load_path) +model = VRNN(loaded_args, True) +sess = tf.Session() +saver = tf.train.Saver(tf.global_variables()) +ckpt = tf.train.get_checkpoint_state(SAVE_DIR) +print("loading model: ", ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) -sample_data,mus,sigmas = model.sample(sess,saved_args) +sample_data,mus,sigmas = model.sample(sess, loaded_args) \ No newline at end of file diff --git a/train.py b/train.py index de94da5..f32bbf4 100644 --- a/train.py +++ b/train.py @@ -40,7 +40,7 @@ def train(args, model): ckpt = tf.train.get_checkpoint_state(SAVE_DIR) n_batches = 100 - with tf.Session() as sess:0 + with tf.Session() as sess: merged = tf.summary.merge_all() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) diff --git a/utils.py b/utils.py index 64e46af..a7fe53e 100644 --- a/utils.py +++ b/utils.py @@ -14,4 +14,4 @@ def pickle_load(path): def pickle_save(content, path): '''Save the content on the path''' with open(path, 'wb') as f: - pickle.dump(content, f) + pickle.dump(content, f) \ No newline at end of file From ce37cc230bd41e1f58b56b33f90f6b888f9190e1 Mon Sep 17 00:00:00 2001 From: maestrojeong Date: Wed, 26 Jul 2017 14:56:40 +0900 Subject: [PATCH 04/10] Restructure whole code --- config.py | 21 ++++- main.py | 186 ++++++++++++++++++++++++++++++++++++++ model.py | 129 +------------------------- sample_vrnn.py => test.py | 0 train.py | 91 ------------------- 5 files changed, 209 insertions(+), 218 deletions(-) create mode 100644 main.py rename sample_vrnn.py => test.py (100%) delete mode 100644 train.py diff --git a/config.py b/config.py index 3621ed5..5e32ee7 100644 --- a/config.py +++ b/config.py @@ -1,3 +1,22 @@ #=================================PATH=========================# -SAVE_DIR = './save-vrnn/' \ No newline at end of file +SAVE_DIR = './save/' + +#======================VRNN configuration=======================# + +class VRNNConfig(object): + def __init__(self): + self.rnn_size = 3 # num of hidden states in RNN + self.latent_size = 3 # size of latent space + + self.seq_length = 100 # RNN sequence length + self.chunk_samples = 1 # number of sample os per mdct chunk + + self.num_epochs = 100 + self.batch_size = 3000 + self.n_batches = 100 + self.save_every = 500 + + self.grad_clip = 10 # clip gradients at this value + self.decay_rate = 1. + self.learning_rate = 0.0005 diff --git a/main.py b/main.py new file mode 100644 index 0000000..95291e7 --- /dev/null +++ b/main.py @@ -0,0 +1,186 @@ +from utils import create_dir, pickle_save +from config import SAVE_DIR, VRNNConfig +from datetime import datetime +from model import VRNNCell + +import matplotlib.pyplot as plt +import tensorflow as tf +import numpy as np +import logging +import pickle +import os + +logging.basicConfig(format = "[%(asctime)s] %(message)s", datefmt="%m%d %H:%M:%S") +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + +class VRNN(VRNNConfig): + def __init__(self, istest=False): + VRNNConfig.__init__(self) + + def tf_normal(y, mu, sigma): + with tf.variable_scope('normal'): + sigma_square = tf.maximum(1e-10, tf.square(sigma)) + norm = tf.subtract(y[:,:args.chunk_samples], mu) + z = tf.div(tf.square(norm), sigma_square) + denom_log = tf.log(2*np.pi*ss, name='denom_log') + result = tf.reduce_sum(z+denom_log, 1)/2# + return result + + def kl_gaussian(mu_1, sigma_1, mu_2, sigma_2): + with tf.variable_scope("kl_gaussisan"): + return tf.reduce_sum(0.5 * ( + 2 * tf.log(tf.maximum(1e-9, sigma_2),name='log_sigma_2') + - 2 * tf.log(tf.maximum(1e-9, sigma_1),name='log_sigma_1') + + (tf.square(sigma_1) + tf.square(mu_1 - mu_2)) / tf.maximum(1e-9,(tf.square(sigma_2))) - 1 + ), 1) + + def get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma, y): + kl_loss = kl_gaussian(enc_mu, enc_sigma, prior_mu, prior_sigma) # KL_divergence + likelihood_loss = tf_normal(y, dec_mu, dec_sigma) + return tf.reduce_mean(kl_loss + likelihood_loss) + + if istest: + self.batch_size = 1 + self.seq_length = 1 + + cell = VRNNCell(self.chunk_samples, self.rnn_size, self.latent_size) + + self.cell = cell + + self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.batch_size, self.seq_length, 2*self.chunk_samples], name='input_data') + self.target_data = tf.placeholder(dtype=tf.float32, shape=[self.batch_size, self.seq_length, 2*self.chunk_samples], name = 'target_data') + self.initial_state_c, self.initial_state_h = self.cell.zero_state(batch_size=self.batch_size, dtype=tf.float32) + + # input shape: (batch_size, n_steps, n_input) + with tf.variable_scope("inputs"): + inputs = tf.transpose(self.input_data, [1, 0, 2]) # [n_steps, batch_size, n_input] + inputs = tf.reshape(inputs, [-1, 2*self.chunk_samples]) # (n_steps*batch_size, n_input) + + # Split data because rnn cell needs a list of inputs for the RNN inner loop + inputs = tf.split(axis=0, num_or_size_splits=self.seq_length, value=inputs) # n_steps * (batch_size, n_hidden) + flat_target_data = tf.reshape(self.target_data, [-1, 2*self.chunk_samples]) + + self.target = flat_target_data + self.flat_input = tf.reshape(tf.transpose(tf.stack(inputs), [1,0,2]), [self.batch_size*self.seq_length, -1]) + self.input = tf.stack(inputs) + # Get vrnn cell output + outputs, last_state = tf.contrib.rnn.static_rnn(cell, inputs, initial_state=(self.initial_state_c, self.initial_state_h)) + #print outputs + #outputs = map(tf.pack,zip(*outputs)) + outputs_reshape = [] + names = ["enc_mu", "enc_sigma", "dec_mu", "dec_sigma", "prior_mu", "prior_sigma"] + for n,name in enumerate(names): + with tf.variable_scope(name): + x = tf.stack([o[n] for o in outputs]) + x = tf.transpose(x,[1,0,2]) + x = tf.reshape(x,[self.batch_size*self.seq_length, -1]) + outputs_reshape.append(x) + + enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma = outputs_reshape + self.final_state_c,self.final_state_h = last_state + self.mu = dec_mu + self.sigma = dec_sigma + + self.cost = get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, dec_sigma, prior_mu, prior_sigma, flat_target_data) + self.sigma = dec_sigma + self.mu = dec_mu + + print_vars("trainable_variables") + self.train_op = tf.train.AdamOptimizer().minimize(self.cost) + sess = tf.Session() + + def sample(self, num=4410, start=None): + + def sample_gaussian(mu, sigma): + return mu + (sigma*np.random.randn(*sigma.shape)) + + if start is None: + prev_x = np.random.randn(1, 1, 2*self.chunk_samples) + elif len(start.shape) == 1: + prev_x = start[np.newaxis,np.newaxis,:] + elif len(start.shape) == 2: + for i in range(start.shape[0]-1): + prev_x = start[i,:] + prev_x = prev_x[np.newaxis,np.newaxis,:] + feed = {self.input_data: prev_x, + self.initial_state_c:prev_state[0], + self.initial_state_h:prev_state[1]} + + [o_mu, o_sigma, o_rho, prev_state_c, prev_state_h] = sess.run( + [self.mu, self.sigma, self.rho, + self.final_state_c,self.final_state_h],feed) + + prev_x = start[-1,:] + prev_x = prev_x[np.newaxis,np.newaxis,:] + + prev_state = sess.run(self.cell.zero_state(1, tf.float32)) + chunks = np.zeros((num, 2*self.chunk_samples), dtype=np.float32) + mus = np.zeros((num, self.chunk_samples), dtype=np.float32) + sigmas = np.zeros((num, self.chunk_samples), dtype=np.float32) + + for i in xrange(num): + feed = {self.input_data: prev_x, + self.initial_state_c:prev_state[0], + self.initial_state_h:prev_state[1]} + [o_mu, o_sigma, o_rho, next_state_c, next_state_h] = sess.run([self.mu, self.sigma, + self.rho, self.final_state_c, self.final_state_h],feed) + + next_x = np.hstack((sample_gaussian(o_mu, o_sigma), + 2.*(o_rho > np.random.random(o_rho.shape[:2]))-1.)) + chunks[i] = next_x + mus[i] = o_mu + sigmas[i] = o_sigma + + prev_x = np.zeros((1, 1, 2*self.chunk_samples), dtype=np.float32) + prev_x[0][0] = next_x + prev_state = next_state_c, next_state_h + + return chunks, mus, sigmas + + def next_batch(self): + t_offset = np.random.randn(self.batch_size, 1, (2 * self.chunk_samples)) + mixed_noise = np.random.randn( + self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.1 + x = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.1 + + mixed_noise*0.1 + + np.sin(2 * np.pi * (np.arange(self.seq_length)[np.newaxis, :, np.newaxis] / 10. + t_offset)) + + y = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.1 + + mixed_noise*0.1 + + np.sin(2 * np.pi * (np.arange(1, self.seq_length+1)[np.newaxis, :, np.newaxis] / 10. + t0)) + y[:, :, self.chunk_samples:] = 0. + x[:, :, self.chunk_samples:] = 0. + return x, y + + def initialize(self): + self.sess.run(tf.global_variables_initializer()) + + def train(self): + self.n_batches = 100 + ckpt = tf.train.get_checkpoint_state(SAVE_DIR) + saver = tf.train.Saver(tf.global_variables()) + + if ckpt: + saver.restore(self.sess, ckpt.model_checkpoint_path) + print("Loaded model") + + for epoch in range(self.num_epochs): + # Learning rate decay + sess.run(tf.assign(model.lr, self.learning_rate * (self.decay_rate ** epoch))) + + for b in range(self.n_batches): + x, y = next_batch(args) + feed_dict = {model.input_data: x, model.target_data: y} + train_loss, _, cr, sigma= sess.run([model.cost, model.train_op, check, model.sigma], feed_dict = feed_dict) + + if (e * self.n_batches + b) % args.save_every == 0 and ((e * n_batches + b) > 0): + checkpoint_path = os.path.join(dirname, 'model.ckpt') + saver.save(sess, checkpoint_path, global_step=e * self.n_batches + b) + print("model saved to {}".format(checkpoint_path)) + print("{}/{}(epoch {}), train_loss = {:.6f}, std = {:.3f}".format(e * self.n_batches + b, args.num_epochs * n_batches, e, self.chunk_samples * train_loss, sigma.mean(axis=0).mean(axis=0))) + +if __name__ == '__main__': + model = VRNN() + model.initialize() + model.train() diff --git a/model.py b/model.py index cd803ce..ba53040 100644 --- a/model.py +++ b/model.py @@ -2,7 +2,7 @@ import tensorflow as tf import numpy as np -class VartiationalRNNCell(tf.nn.rnn_cell.RNNCell): +class VRNNCell(tf.nn.rnn_cell.RNNCell): """Variational RNN cell.""" def __init__(self, x_dim, h_dim, z_dim = 100): @@ -25,7 +25,7 @@ def output_size(self): return self.n_h def __call__(self, x, state, scope=None): - ''' + ''' Args: x - input 2D tensor state - tuple @@ -52,7 +52,7 @@ def __call__(self, x, state, scope=None): eps = tf.random_normal((get_shape(x)[0], self.n_z), 0.0, 1.0, dtype=tf.float32) # z = mu + sigma*epsilon, latent variable from reparametrization trick z = tf.add(enc_mu, tf.multiply(enc_sigma, eps)) - z_1 = fc_layer(z, self.n_z_1, activation = tf.nn.relu, scope = "phi_z") + z_1 = fc_layer(z, self.n_z_1, activation = tf.nn.relu, scope = "phi_z") with tf.variable_scope("Decoder"): dec_hidden = fc_layer(tf.concat(values=(z1, h), axis=1), self.n_dec_hidden, activation = tf.nn.relu, scope = "hidden") @@ -61,126 +61,3 @@ def __call__(self, x, state, scope=None): output, next_state = self.lstm(tf.concat(values=(x_1, z_1), axis=1), state) return (enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma), next_state - -class VRNN(): - def __init__(self, args, istest=False): - def tf_normal(y, mu, sigma): - with tf.variable_scope('normal'): - sigma_square = tf.maximum(1e-10, tf.square(sigma)) - norm = tf.subtract(y[:,:args.chunk_samples], mu) - z = tf.div(tf.square(norm), sigma_square) - denom_log = tf.log(2*np.pi*ss, name='denom_log') - result = tf.reduce_sum(z+denom_log, 1)/2# - return result - - def kl_gaussian(mu_1, sigma_1, mu_2, sigma_2): - with tf.variable_scope("kl_gaussisan"): - return tf.reduce_sum(0.5 * ( - 2 * tf.log(tf.maximum(1e-9, sigma_2),name='log_sigma_2') - - 2 * tf.log(tf.maximum(1e-9, sigma_1),name='log_sigma_1') - + (tf.square(sigma_1) + tf.square(mu_1 - mu_2)) / tf.maximum(1e-9,(tf.square(sigma_2))) - 1 - ), 1) - - def get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma, y): - kl_loss = kl_gaussian(enc_mu, enc_sigma, prior_mu, prior_sigma) # KL_divergence - likelihood_loss = tf_normal(y, dec_mu, dec_sigma) - return tf.reduce_mean(kl_loss + likelihood_loss) - - self.args = args - - if istest: - args.batch_size = 1 - args.seq_length = 1 - - cell = VartiationalRNNCell(args.chunk_samples, args.rnn_size, args.latent_size) - - self.cell = cell - - self.input_data = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, args.seq_length, 2*args.chunk_samples], name='input_data') - self.target_data = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, args.seq_length, 2*args.chunk_samples], name = 'target_data') - self.initial_state_c, self.initial_state_h = self.cell.zero_state(batch_size=args.batch_size, dtype=tf.float32) - - # input shape: (batch_size, n_steps, n_input) - with tf.variable_scope("inputs"): - inputs = tf.transpose(self.input_data, [1, 0, 2]) # [n_steps, batch_size, n_input] - inputs = tf.reshape(inputs, [-1, 2*args.chunk_samples]) # (n_steps*batch_size, n_input) - - # Split data because rnn cell needs a list of inputs for the RNN inner loop - inputs = tf.split(axis=0, num_or_size_splits=args.seq_length, value=inputs) # n_steps * (batch_size, n_hidden) - flat_target_data = tf.reshape(self.target_data, [-1, 2*args.chunk_samples]) - - self.target = flat_target_data - self.flat_input = tf.reshape(tf.transpose(tf.stack(inputs), [1,0,2]), [args.batch_size*args.seq_length, -1]) - self.input = tf.stack(inputs) - # Get vrnn cell output - outputs, last_state = tf.contrib.rnn.static_rnn(cell, inputs, initial_state=(self.initial_state_c, self.initial_state_h)) - #print outputs - #outputs = map(tf.pack,zip(*outputs)) - outputs_reshape = [] - names = ["enc_mu", "enc_sigma", "dec_mu", "dec_sigma", "prior_mu", "prior_sigma"] - for n,name in enumerate(names): - with tf.variable_scope(name): - x = tf.stack([o[n] for o in outputs]) - x = tf.transpose(x,[1,0,2]) - x = tf.reshape(x,[args.batch_size*args.seq_length, -1]) - outputs_reshape.append(x) - - enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma = outputs_reshape - self.final_state_c,self.final_state_h = last_state - self.mu = dec_mu - self.sigma = dec_sigma - - self.cost = get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, dec_sigma, prior_mu, prior_sigma, flat_target_data) - self.sigma = dec_sigma - self.mu = dec_mu - - print_vars("trainable_variables") - self.train_op = tf.train.AdamOptimizer().minimize(self.cost) - - def sample(self, sess, args, num=4410, start=None): - - def sample_gaussian(mu, sigma): - return mu + (sigma*np.random.randn(*sigma.shape)) - - if start is None: - prev_x = np.random.randn(1, 1, 2*args.chunk_samples) - elif len(start.shape) == 1: - prev_x = start[np.newaxis,np.newaxis,:] - elif len(start.shape) == 2: - for i in range(start.shape[0]-1): - prev_x = start[i,:] - prev_x = prev_x[np.newaxis,np.newaxis,:] - feed = {self.input_data: prev_x, - self.initial_state_c:prev_state[0], - self.initial_state_h:prev_state[1]} - - [o_mu, o_sigma, o_rho, prev_state_c, prev_state_h] = sess.run( - [self.mu, self.sigma, self.rho, - self.final_state_c,self.final_state_h],feed) - - prev_x = start[-1,:] - prev_x = prev_x[np.newaxis,np.newaxis,:] - - prev_state = sess.run(self.cell.zero_state(1, tf.float32)) - chunks = np.zeros((num, 2*args.chunk_samples), dtype=np.float32) - mus = np.zeros((num, args.chunk_samples), dtype=np.float32) - sigmas = np.zeros((num, args.chunk_samples), dtype=np.float32) - - for i in xrange(num): - feed = {self.input_data: prev_x, - self.initial_state_c:prev_state[0], - self.initial_state_h:prev_state[1]} - [o_mu, o_sigma, o_rho, next_state_c, next_state_h] = sess.run([self.mu, self.sigma, - self.rho, self.final_state_c, self.final_state_h],feed) - - next_x = np.hstack((sample_gaussian(o_mu, o_sigma), - 2.*(o_rho > np.random.random(o_rho.shape[:2]))-1.)) - chunks[i] = next_x - mus[i] = o_mu - sigmas[i] = o_sigma - - prev_x = np.zeros((1, 1, 2*args.chunk_samples), dtype=np.float32) - prev_x[0][0] = next_x - prev_state = next_state_c, next_state_h - - return chunks, mus, sigmas diff --git a/sample_vrnn.py b/test.py similarity index 100% rename from sample_vrnn.py rename to test.py diff --git a/train.py b/train.py deleted file mode 100644 index f32bbf4..0000000 --- a/train.py +++ /dev/null @@ -1,91 +0,0 @@ -from config import SAVE_DIR -from utils import create_dir, pickle_save -from model_vrnn import VRNN -import matplotlib.pyplot as plt - -import numpy as np -import tensorflow as tf -from datetime import datetime -import os -import pickle - -''' -TODOS: - - parameters for depth and width of hidden layers - - implement predict function - - separate binary and gaussian variables - - clean up nomenclature to remove MDCT references - - implement separate MDCT training and sampling version -''' - -def next_batch(args): - t_offset = np.random.randn(args.batch_size, 1, (2 * args.chunk_samples)) - mixed_noise = np.random.randn( - args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1 - x = np.random.randn(args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1 - + mixed_noise*0.1 - + np.sin(2 * np.pi * (np.arange(args.seq_length)[np.newaxis, :, np.newaxis] / 10. + t_offset)) - - y = np.random.randn(args.batch_size, args.seq_length, (2 * args.chunk_samples)) * 0.1 - + mixed_noise*0.1 - + np.sin(2 * np.pi * (np.arange(1, args.seq_length+1)[np.newaxis, :, np.newaxis] / 10. + t0)) - y[:, :, args.chunk_samples:] = 0. - x[:, :, args.chunk_samples:] = 0. - return x, y - -def train(args, model): - create_dir(SAVE_DIR) - pickle_path = os.path.join(SAVE_DIR, 'config.pkl') - pickle_save(args, pickle_path) - - ckpt = tf.train.get_checkpoint_state(SAVE_DIR) - n_batches = 100 - with tf.Session() as sess: - merged = tf.summary.merge_all() - sess.run(tf.global_variables_initializer()) - saver = tf.train.Saver(tf.global_variables()) - if ckpt: - saver.restore(sess, ckpt.model_checkpoint_path) - print("Loaded model") - - for epoch in range(args.num_epochs): - # Learning rate decay - sess.run(tf.assign(model.lr, args.learning_rate * (args.decay_rate ** epoch))) - - for b in range(n_batches): - x, y = next_batch(args) - feed_dict = {model.input_data: x, model.target_data: y} - train_loss, _, cr, sigma= sess.run([model.cost, model.train_op, check, model.sigma], feed_dict = feed_dict) - - if (e * n_batches + b) % args.save_every == 0 and ((e * n_batches + b) > 0): - checkpoint_path = os.path.join(dirname, 'model.ckpt') - saver.save(sess, checkpoint_path, global_step=e * n_batches + b) - print("model saved to {}".format(checkpoint_path)) - print("{}/{}(epoch {}), train_loss = {:.6f}, std = {:.3f}".format(e * n_batches + b, args.num_epochs * n_batches, e, args.chunk_samples * train_loss, sigma.mean(axis=0).mean(axis=0))) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument('--rnn_size', type=int, default=3, - help='size of RNN hidden state') - parser.add_argument('--latent_size', type=int, default=3, - help='size of latent space') - parser.add_argument('--batch_size', type=int, default=3000, - help='minibatch size') - parser.add_argument('--seq_length', type=int, default=100, - help='RNN sequence length') - parser.add_argument('--num_epochs', type=int, default=100, - help='number of epochs') - parser.add_argument('--save_every', type=int, default=500, - help='save frequency') - parser.add_argument('--grad_clip', type=float, default=10., - help='clip gradients at this value') - parser.add_argument('--learning_rate', type=float, default=0.0005, - help='learning rate') - parser.add_argument('--decay_rate', type=float, default=1., - help='decay of learning rate') - parser.add_argument('--chunk_samples', type=int, default=1, - help='number of samples per mdct chunk') - args = parser.parse_args() - model = VRNN(args) - train(args, model) From 84145851f074a7fd1ad329c743d6b116d3334c56 Mon Sep 17 00:00:00 2001 From: maestrojeong Date: Wed, 26 Jul 2017 15:02:37 +0900 Subject: [PATCH 05/10] Remove test.py --- main.py | 9 +++++++++ test.py | 19 ------------------- 2 files changed, 9 insertions(+), 19 deletions(-) delete mode 100644 test.py diff --git a/main.py b/main.py index 95291e7..84b9f59 100644 --- a/main.py +++ b/main.py @@ -156,6 +156,12 @@ def next_batch(self): def initialize(self): self.sess.run(tf.global_variables_initializer()) + def restore(self): + saver = tf.train.Saver(tf.global_variables()) + ckpt = tf.train.get_checkpoint_state(SAVE_DIR) + print("loading model: ", ckpt.model_checkpoint_path) + saver.restore(self.sess, ckpt.model_checkpoint_path) + def train(self): self.n_batches = 100 ckpt = tf.train.get_checkpoint_state(SAVE_DIR) @@ -184,3 +190,6 @@ def train(self): model = VRNN() model.initialize() model.train() + #model = VRNN(True) + #model.restore() + #sample_data,mus,sigmas = model.sample() diff --git a/test.py b/test.py deleted file mode 100644 index 660fcee..0000000 --- a/test.py +++ /dev/null @@ -1,19 +0,0 @@ -from train import next_batch -from utils import pickle_load -from model import VRNN -from config import SAVE_DIR -import tensorflow as tf -import numpy as np -import pickle -import os - -load_path = os.path.join(SAVE_DIR, 'config.pkl') -loaded_args = pickle_load(load_path) - -model = VRNN(loaded_args, True) -sess = tf.Session() -saver = tf.train.Saver(tf.global_variables()) -ckpt = tf.train.get_checkpoint_state(SAVE_DIR) -print("loading model: ", ckpt.model_checkpoint_path) -saver.restore(sess, ckpt.model_checkpoint_path) -sample_data,mus,sigmas = model.sample(sess, loaded_args) \ No newline at end of file From 0bacfdbdd7c8246f6efbb8ad0bc21ae9293a101d Mon Sep 17 00:00:00 2001 From: maestrojeong Date: Wed, 26 Jul 2017 16:56:06 +0900 Subject: [PATCH 06/10] Remove redundant things --- config.py | 2 +- main.py | 170 ++++++++++++++++++++++++++++++------------------------ model.py | 2 +- 3 files changed, 97 insertions(+), 77 deletions(-) diff --git a/config.py b/config.py index 5e32ee7..76ffce3 100644 --- a/config.py +++ b/config.py @@ -10,7 +10,7 @@ def __init__(self): self.latent_size = 3 # size of latent space self.seq_length = 100 # RNN sequence length - self.chunk_samples = 1 # number of sample os per mdct chunk + self.chunk_samples = 1 # number of samples per mdct chunk self.num_epochs = 100 self.batch_size = 3000 diff --git a/main.py b/main.py index 84b9f59..9e3092e 100644 --- a/main.py +++ b/main.py @@ -18,26 +18,31 @@ class VRNN(VRNNConfig): def __init__(self, istest=False): VRNNConfig.__init__(self) - def tf_normal(y, mu, sigma): - with tf.variable_scope('normal'): - sigma_square = tf.maximum(1e-10, tf.square(sigma)) - norm = tf.subtract(y[:,:args.chunk_samples], mu) - z = tf.div(tf.square(norm), sigma_square) - denom_log = tf.log(2*np.pi*ss, name='denom_log') - result = tf.reduce_sum(z+denom_log, 1)/2# - return result + def NLL(y, mu, sigma): + '''Negative LogLiklihood + - log(1/sqrt(2*pi)e-(y-mu)^2/2/sigma^2) + = + 1/2*(log(2*pi)+ (y-mu)^2/2/sigma^2) + ''' + with tf.variable_scope('NLL'): + sigma_square = tf.maximum(1e-10, tf.square(sigma)) # sigma^2, avoid to be zero + norm = tf.subtract(y[:,:args.chunk_samples], mu) # x-\mu + z = tf.div(tf.square(norm), sigma_square) # (x-\mu)^2/sigma^2 + denom_log = tf.log(2*np.pi*ss) + return 0.5*tf.reduce_sum(z+denom_log, 1) def kl_gaussian(mu_1, sigma_1, mu_2, sigma_2): + ''' + Kullback leibler divergence for two gaussian distributions + ''' with tf.variable_scope("kl_gaussisan"): return tf.reduce_sum(0.5 * ( 2 * tf.log(tf.maximum(1e-9, sigma_2),name='log_sigma_2') - 2 * tf.log(tf.maximum(1e-9, sigma_1),name='log_sigma_1') - + (tf.square(sigma_1) + tf.square(mu_1 - mu_2)) / tf.maximum(1e-9,(tf.square(sigma_2))) - 1 - ), 1) + + (tf.square(sigma_1) + tf.square(mu_1 - mu_2)) / tf.maximum(1e-9,(tf.square(sigma_2))) - 1), 1) def get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma, y): - kl_loss = kl_gaussian(enc_mu, enc_sigma, prior_mu, prior_sigma) # KL_divergence - likelihood_loss = tf_normal(y, dec_mu, dec_sigma) + kl_loss = kl_gaussian(enc_mu, enc_sigma, prior_mu, prior_sigma) # KL_divergence loss + likelihood_loss = NLL(y, dec_mu, dec_sigma) # Negative log liklihood loss return tf.reduce_mean(kl_loss + likelihood_loss) if istest: @@ -48,26 +53,23 @@ def get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma, y) self.cell = cell + # [batch_size, seq_length, chunk_samples*2] self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.batch_size, self.seq_length, 2*self.chunk_samples], name='input_data') + # [batch_size, seq_length, chunk_samples*2] self.target_data = tf.placeholder(dtype=tf.float32, shape=[self.batch_size, self.seq_length, 2*self.chunk_samples], name = 'target_data') + # [batch_size, rnn_size] self.initial_state_c, self.initial_state_h = self.cell.zero_state(batch_size=self.batch_size, dtype=tf.float32) - # input shape: (batch_size, n_steps, n_input) with tf.variable_scope("inputs"): - inputs = tf.transpose(self.input_data, [1, 0, 2]) # [n_steps, batch_size, n_input] - inputs = tf.reshape(inputs, [-1, 2*self.chunk_samples]) # (n_steps*batch_size, n_input) + inputs = tf.transpose(self.input_data, [1, 0, 2]) # [seq_length, batch_size, 2*chunk_samples] + inputs = tf.reshape(inputs, [-1, 2*self.chunk_samples]) # [seq_length*batch_size, 2*chunk_samples] + inputs = tf.split(axis=0, num_or_size_splits=self.seq_length, value=inputs) # seq_length * [batch_size, 2*chunk_samples] - # Split data because rnn cell needs a list of inputs for the RNN inner loop - inputs = tf.split(axis=0, num_or_size_splits=self.seq_length, value=inputs) # n_steps * (batch_size, n_hidden) - flat_target_data = tf.reshape(self.target_data, [-1, 2*self.chunk_samples]) + # [batch_size* seq_length, chunk_samples*2] + self.target = tf.reshape(self.target_data, [-1, 2*self.chunk_samples]) - self.target = flat_target_data - self.flat_input = tf.reshape(tf.transpose(tf.stack(inputs), [1,0,2]), [self.batch_size*self.seq_length, -1]) - self.input = tf.stack(inputs) - # Get vrnn cell output outputs, last_state = tf.contrib.rnn.static_rnn(cell, inputs, initial_state=(self.initial_state_c, self.initial_state_h)) - #print outputs - #outputs = map(tf.pack,zip(*outputs)) + outputs_reshape = [] names = ["enc_mu", "enc_sigma", "dec_mu", "dec_sigma", "prior_mu", "prior_sigma"] for n,name in enumerate(names): @@ -90,67 +92,36 @@ def get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma, y) self.train_op = tf.train.AdamOptimizer().minimize(self.cost) sess = tf.Session() - def sample(self, num=4410, start=None): - - def sample_gaussian(mu, sigma): - return mu + (sigma*np.random.randn(*sigma.shape)) - - if start is None: - prev_x = np.random.randn(1, 1, 2*self.chunk_samples) - elif len(start.shape) == 1: - prev_x = start[np.newaxis,np.newaxis,:] - elif len(start.shape) == 2: - for i in range(start.shape[0]-1): - prev_x = start[i,:] - prev_x = prev_x[np.newaxis,np.newaxis,:] - feed = {self.input_data: prev_x, - self.initial_state_c:prev_state[0], - self.initial_state_h:prev_state[1]} - - [o_mu, o_sigma, o_rho, prev_state_c, prev_state_h] = sess.run( - [self.mu, self.sigma, self.rho, - self.final_state_c,self.final_state_h],feed) - - prev_x = start[-1,:] - prev_x = prev_x[np.newaxis,np.newaxis,:] - - prev_state = sess.run(self.cell.zero_state(1, tf.float32)) - chunks = np.zeros((num, 2*self.chunk_samples), dtype=np.float32) - mus = np.zeros((num, self.chunk_samples), dtype=np.float32) - sigmas = np.zeros((num, self.chunk_samples), dtype=np.float32) - - for i in xrange(num): - feed = {self.input_data: prev_x, - self.initial_state_c:prev_state[0], - self.initial_state_h:prev_state[1]} - [o_mu, o_sigma, o_rho, next_state_c, next_state_h] = sess.run([self.mu, self.sigma, - self.rho, self.final_state_c, self.final_state_h],feed) - - next_x = np.hstack((sample_gaussian(o_mu, o_sigma), - 2.*(o_rho > np.random.random(o_rho.shape[:2]))-1.)) - chunks[i] = next_x - mus[i] = o_mu - sigmas[i] = o_sigma + def next_batch(self): + ''' + 3D signal + [batch_axis, time_axis, chunk_axis] + = common noise + noise + sin(time_axis[:] + time_offset) - prev_x = np.zeros((1, 1, 2*self.chunk_samples), dtype=np.float32) - prev_x[0][0] = next_x - prev_state = next_state_c, next_state_h + half of the chunk_axis are all zeros - return chunks, mus, sigmas + Return: + x, y + x - 3D ndarray + [self.batch_size, self.seq_length, 2*self.chunk_samples] + y - 3D ndarray + [self.batch_size, self.seq_length, 2*self.chunk_samples] - def next_batch(self): + ''' t_offset = np.random.randn(self.batch_size, 1, (2 * self.chunk_samples)) - mixed_noise = np.random.randn( - self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.1 + mixed_noise = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.01 + x = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.1 - + mixed_noise*0.1 + + mixed_noise + np.sin(2 * np.pi * (np.arange(self.seq_length)[np.newaxis, :, np.newaxis] / 10. + t_offset)) y = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.1 - + mixed_noise*0.1 - + np.sin(2 * np.pi * (np.arange(1, self.seq_length+1)[np.newaxis, :, np.newaxis] / 10. + t0)) + + mixed_noise + + np.sin(2 * np.pi * (np.arange(1, self.seq_length+1)[np.newaxis, :, np.newaxis] / 10. + t_offset)) + y[:, :, self.chunk_samples:] = 0. x[:, :, self.chunk_samples:] = 0. + return x, y def initialize(self): @@ -186,6 +157,55 @@ def train(self): print("model saved to {}".format(checkpoint_path)) print("{}/{}(epoch {}), train_loss = {:.6f}, std = {:.3f}".format(e * self.n_batches + b, args.num_epochs * n_batches, e, self.chunk_samples * train_loss, sigma.mean(axis=0).mean(axis=0))) + def sample(self, num=4410, start=None): + def sample_gaussian(mu, sigma): + return mu + (sigma*np.random.randn(*sigma.shape)) + + if start is None: + prev_x = np.random.randn(1, 1, 2*self.chunk_samples) + elif len(start.shape) == 1: + prev_x = start[np.newaxis,np.newaxis,:] + elif len(start.shape) == 2: + for i in range(start.shape[0]-1): + prev_x = start[i,:] + prev_x = prev_x[np.newaxis,np.newaxis,:] + feed_dict = {self.input_data: prev_x, + self.initial_state_c:prev_state[0], + self.initial_state_h:prev_state[1]} + + [o_mu, o_sigma, o_rho, prev_state_c, prev_state_h] = sess.run( + [self.mu, self.sigma, self.rho,self.final_state_c,self.final_state_h], + feed_dict=feed_dict) + + prev_x = start[-1,:] + prev_x = prev_x[np.newaxis,np.newaxis,:] + + prev_state = sess.run(self.cell.zero_state(1, tf.float32)) + chunks = np.zeros((num, 2*self.chunk_samples), dtype=np.float32) + mus = np.zeros((num, self.chunk_samples), dtype=np.float32) + sigmas = np.zeros((num, self.chunk_samples), dtype=np.float32) + + for i in range(num): + feed_dict = {self.input_data: prev_x, + self.initial_state_c:prev_state[0], + self.initial_state_h:prev_state[1]} + [o_mu, o_sigma, next_state_c, next_state_h] = sess.run( + [self.mu, self.sigma, self.final_state_c, self.final_state_h], + feed_dict = feed_dict) + + next_x = np.hstack((sample_gaussian(o_mu, o_sigma), + 2.*(o_rho > np.random.random(o_rho.shape[:2]))-1.)) + chunks[i] = next_x + mus[i] = o_mu + sigmas[i] = o_sigma + + prev_x = np.zeros((1, 1, 2*self.chunk_samples), dtype=np.float32) + prev_x[0][0] = next_x + prev_state = next_state_c, next_state_h + + return chunks, mus, sigmas + + if __name__ == '__main__': model = VRNN() model.initialize() diff --git a/model.py b/model.py index ba53040..013f5a4 100644 --- a/model.py +++ b/model.py @@ -35,7 +35,6 @@ def __call__(self, x, state, scope=None): ''' with tf.variable_scope(scope or type(self).__name__): h, c = state - with tf.variable_scope("Prior"): prior_hidden = fc_layer(h, self.n_prior_hidden, activation = tf.nn.relu, scope = "hidden") prior_mu = fc_layer(prior_hidden, self.n_z, scope = "mu") @@ -60,4 +59,5 @@ def __call__(self, x, state, scope=None): dec_sigma = fc_layer(dec_hidden, self.n_x, scope = "sigma") output, next_state = self.lstm(tf.concat(values=(x_1, z_1), axis=1), state) + return (enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma), next_state From 70ef7d8d6e7cbd6d999c54721c3725fbf447aada Mon Sep 17 00:00:00 2001 From: maestrojeong Date: Wed, 26 Jul 2017 17:29:49 +0900 Subject: [PATCH 07/10] Runnable code --- config.py | 2 +- main.py | 62 +++++++++++++++++++++++++++++-------------------------- model.py | 2 +- ops.py | 10 ++++----- 4 files changed, 40 insertions(+), 36 deletions(-) diff --git a/config.py b/config.py index 76ffce3..b3e28d7 100644 --- a/config.py +++ b/config.py @@ -19,4 +19,4 @@ def __init__(self): self.grad_clip = 10 # clip gradients at this value self.decay_rate = 1. - self.learning_rate = 0.0005 + self.lr = 0.0005 # initial learning_rate diff --git a/main.py b/main.py index 9e3092e..d7292f3 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,7 @@ from utils import create_dir, pickle_save from config import SAVE_DIR, VRNNConfig from datetime import datetime +from ops import print_vars from model import VRNNCell import matplotlib.pyplot as plt @@ -17,7 +18,7 @@ class VRNN(VRNNConfig): def __init__(self, istest=False): VRNNConfig.__init__(self) - + logger.info("Building model starts...") def NLL(y, mu, sigma): '''Negative LogLiklihood - log(1/sqrt(2*pi)e-(y-mu)^2/2/sigma^2) @@ -25,9 +26,9 @@ def NLL(y, mu, sigma): ''' with tf.variable_scope('NLL'): sigma_square = tf.maximum(1e-10, tf.square(sigma)) # sigma^2, avoid to be zero - norm = tf.subtract(y[:,:args.chunk_samples], mu) # x-\mu + norm = tf.subtract(y[:,:self.chunk_samples], mu) # x-\mu z = tf.div(tf.square(norm), sigma_square) # (x-\mu)^2/sigma^2 - denom_log = tf.log(2*np.pi*ss) + denom_log = tf.log(2*np.pi*sigma_square) return 0.5*tf.reduce_sum(z+denom_log, 1) def kl_gaussian(mu_1, sigma_1, mu_2, sigma_2): @@ -48,11 +49,10 @@ def get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma, y) if istest: self.batch_size = 1 self.seq_length = 1 - - cell = VRNNCell(self.chunk_samples, self.rnn_size, self.latent_size) - - self.cell = cell - + logger.info("Building VRNNCell starts...") + self.cell = VRNNCell(self.chunk_samples, self.rnn_size, self.latent_size) + logger.info("Building VRNNCell done.") + # [batch_size, seq_length, chunk_samples*2] self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.batch_size, self.seq_length, 2*self.chunk_samples], name='input_data') # [batch_size, seq_length, chunk_samples*2] @@ -68,7 +68,7 @@ def get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma, y) # [batch_size* seq_length, chunk_samples*2] self.target = tf.reshape(self.target_data, [-1, 2*self.chunk_samples]) - outputs, last_state = tf.contrib.rnn.static_rnn(cell, inputs, initial_state=(self.initial_state_c, self.initial_state_h)) + outputs, last_state = tf.contrib.rnn.static_rnn(self.cell, inputs, initial_state=(self.initial_state_c, self.initial_state_h)) outputs_reshape = [] names = ["enc_mu", "enc_sigma", "dec_mu", "dec_sigma", "prior_mu", "prior_sigma"] @@ -84,13 +84,17 @@ def get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma, y) self.mu = dec_mu self.sigma = dec_sigma - self.cost = get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, dec_sigma, prior_mu, prior_sigma, flat_target_data) + self.cost = get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma, self.target) self.sigma = dec_sigma self.mu = dec_mu print_vars("trainable_variables") - self.train_op = tf.train.AdamOptimizer().minimize(self.cost) - sess = tf.Session() + self.lr = tf.Variable(self.lr, trainable = False) + self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.cost) + + logger.info("Building model done.") + + self.sess = tf.Session() def next_batch(self): ''' @@ -111,20 +115,16 @@ def next_batch(self): t_offset = np.random.randn(self.batch_size, 1, (2 * self.chunk_samples)) mixed_noise = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.01 - x = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.1 - + mixed_noise - + np.sin(2 * np.pi * (np.arange(self.seq_length)[np.newaxis, :, np.newaxis] / 10. + t_offset)) + x = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.1 + mixed_noise + np.sin(2 * np.pi * (np.arange(self.seq_length)[np.newaxis, :, np.newaxis] / 10. + t_offset)) - y = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.1 - + mixed_noise - + np.sin(2 * np.pi * (np.arange(1, self.seq_length+1)[np.newaxis, :, np.newaxis] / 10. + t_offset)) + y = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.1 + mixed_noise + np.sin(2 * np.pi * (np.arange(1, self.seq_length+1)[np.newaxis, :, np.newaxis] / 10. + t_offset)) y[:, :, self.chunk_samples:] = 0. x[:, :, self.chunk_samples:] = 0. - return x, y def initialize(self): + logger.info("Initialization of parameters") self.sess.run(tf.global_variables_initializer()) def restore(self): @@ -142,20 +142,24 @@ def train(self): saver.restore(self.sess, ckpt.model_checkpoint_path) print("Loaded model") + iteration = 0 for epoch in range(self.num_epochs): # Learning rate decay - sess.run(tf.assign(model.lr, self.learning_rate * (self.decay_rate ** epoch))) + self.sess.run(tf.assign(self.lr, self.lr * (self.decay_rate ** epoch))) - for b in range(self.n_batches): - x, y = next_batch(args) + for batch in range(self.n_batches): + x, y = self.next_batch() feed_dict = {model.input_data: x, model.target_data: y} - train_loss, _, cr, sigma= sess.run([model.cost, model.train_op, check, model.sigma], feed_dict = feed_dict) + train_loss, _, sigma= self.sess.run([self.cost, self.train_op, self.sigma], feed_dict = feed_dict) - if (e * self.n_batches + b) % args.save_every == 0 and ((e * n_batches + b) > 0): + iteration+=1 + print("{}/{}(epoch {}), train_loss = {:.6f}, std = {:.3f}".format(iteration, self.num_epochs * self.n_batches, epoch, self.chunk_samples * train_loss, sigma.mean(axis=0).mean(axis=0))) + + if iteration % self.save_every == 0 and iteration > 0: checkpoint_path = os.path.join(dirname, 'model.ckpt') - saver.save(sess, checkpoint_path, global_step=e * self.n_batches + b) - print("model saved to {}".format(checkpoint_path)) - print("{}/{}(epoch {}), train_loss = {:.6f}, std = {:.3f}".format(e * self.n_batches + b, args.num_epochs * n_batches, e, self.chunk_samples * train_loss, sigma.mean(axis=0).mean(axis=0))) + saver.save(self.sess, checkpoint_path, global_step=iteration) + logger.info("model saved to {}".format(checkpoint_path)) + def sample(self, num=4410, start=None): def sample_gaussian(mu, sigma): @@ -173,7 +177,7 @@ def sample_gaussian(mu, sigma): self.initial_state_c:prev_state[0], self.initial_state_h:prev_state[1]} - [o_mu, o_sigma, o_rho, prev_state_c, prev_state_h] = sess.run( + [o_mu, o_sigma, o_rho, prev_state_c, prev_state_h] = self.sess.run( [self.mu, self.sigma, self.rho,self.final_state_c,self.final_state_h], feed_dict=feed_dict) @@ -189,7 +193,7 @@ def sample_gaussian(mu, sigma): feed_dict = {self.input_data: prev_x, self.initial_state_c:prev_state[0], self.initial_state_h:prev_state[1]} - [o_mu, o_sigma, next_state_c, next_state_h] = sess.run( + [o_mu, o_sigma, next_state_c, next_state_h] = self.sess.run( [self.mu, self.sigma, self.final_state_c, self.final_state_h], feed_dict = feed_dict) diff --git a/model.py b/model.py index 013f5a4..9bef2bd 100644 --- a/model.py +++ b/model.py @@ -54,7 +54,7 @@ def __call__(self, x, state, scope=None): z_1 = fc_layer(z, self.n_z_1, activation = tf.nn.relu, scope = "phi_z") with tf.variable_scope("Decoder"): - dec_hidden = fc_layer(tf.concat(values=(z1, h), axis=1), self.n_dec_hidden, activation = tf.nn.relu, scope = "hidden") + dec_hidden = fc_layer(tf.concat(values=(z_1, h), axis=1), self.n_dec_hidden, activation = tf.nn.relu, scope = "hidden") dec_mu = fc_layer(dec_hidden, self.n_x, scope = "mu") dec_sigma = fc_layer(dec_hidden, self.n_x, scope = "sigma") diff --git a/ops.py b/ops.py index e4b672f..87fcad4 100644 --- a/ops.py +++ b/ops.py @@ -1,14 +1,14 @@ import tensorflow as tf -def print_vars(string): - '''print variables in collection named string''' - print("Collection name %s"%string) - print([v.name for v in tf.get_collection(string)]) - def get_shape(tensor): '''return the shape of tensor as list''' return tensor.get_shape().as_list() +def print_vars(string): + '''print variables in collection named string''' + print("Collection name %s"%string) + print(" "+"\n ".join(["{} : {}".format(v.name, get_shape(v)) for v in tf.get_collection(string)])) + def fc_layer(input_, output_size, activation = None, batch_norm = False, istrain = False, scope = None): ''' fully convlolution layer From d2ef3e68e4c60a889d3b4b3704bb2107d56c2abe Mon Sep 17 00:00:00 2001 From: maestrojeong Date: Wed, 26 Jul 2017 19:59:27 +0900 Subject: [PATCH 08/10] Complete the test code --- model.py => cell.py | 12 +++-- config.py | 4 +- main.py | 120 +++++++++++++++++++++++++++----------------- utils.py | 2 +- 4 files changed, 87 insertions(+), 51 deletions(-) rename model.py => cell.py (87%) diff --git a/model.py b/cell.py similarity index 87% rename from model.py rename to cell.py index 9bef2bd..f571847 100644 --- a/model.py +++ b/cell.py @@ -6,6 +6,12 @@ class VRNNCell(tf.nn.rnn_cell.RNNCell): """Variational RNN cell.""" def __init__(self, x_dim, h_dim, z_dim = 100): + ''' + Args: + x_dim - chunk_samples + h_dim - rnn_size + z_dim - latent_size + ''' self.n_h = h_dim self.n_x = x_dim self.n_z = z_dim @@ -27,7 +33,7 @@ def output_size(self): def __call__(self, x, state, scope=None): ''' Args: - x - input 2D tensor + x - input 2D tensor [batch_size x 2*self.chunk_samples] state - tuple (hidden, cell_state) scope - string @@ -45,7 +51,7 @@ def __call__(self, x, state, scope=None): with tf.variable_scope("Encoder"): enc_hidden = fc_layer(tf.concat(values=(x_1, h), axis=1), self.n_enc_hidden, activation = tf.nn.relu, scope = "hidden") enc_mu = fc_layer(enc_hidden, self.n_z, scope = 'mu') - enc_sigma = fc_layer(enc_hidden, self.n_z, activation = tf.nn.softplus, scope = 'sigma') + enc_sigma = fc_layer(enc_hidden, self.n_z, activation = tf.nn.softplus, scope = 'sigma') # Random sampling ~ N(0, 1) eps = tf.random_normal((get_shape(x)[0], self.n_z), 0.0, 1.0, dtype=tf.float32) @@ -56,7 +62,7 @@ def __call__(self, x, state, scope=None): with tf.variable_scope("Decoder"): dec_hidden = fc_layer(tf.concat(values=(z_1, h), axis=1), self.n_dec_hidden, activation = tf.nn.relu, scope = "hidden") dec_mu = fc_layer(dec_hidden, self.n_x, scope = "mu") - dec_sigma = fc_layer(dec_hidden, self.n_x, scope = "sigma") + dec_sigma = fc_layer(dec_hidden, self.n_x, activation = tf.nn.softplus, scope = "sigma") output, next_state = self.lstm(tf.concat(values=(x_1, z_1), axis=1), state) diff --git a/config.py b/config.py index b3e28d7..4ba7d78 100644 --- a/config.py +++ b/config.py @@ -12,10 +12,10 @@ def __init__(self): self.seq_length = 100 # RNN sequence length self.chunk_samples = 1 # number of samples per mdct chunk - self.num_epochs = 100 + self.num_epochs = 1 self.batch_size = 3000 self.n_batches = 100 - self.save_every = 500 + self.log_every = 20 self.grad_clip = 10 # clip gradients at this value self.decay_rate = 1. diff --git a/main.py b/main.py index d7292f3..5216979 100644 --- a/main.py +++ b/main.py @@ -2,9 +2,8 @@ from config import SAVE_DIR, VRNNConfig from datetime import datetime from ops import print_vars -from model import VRNNCell +from cell import VRNNCell -import matplotlib.pyplot as plt import tensorflow as tf import numpy as np import logging @@ -23,6 +22,12 @@ def NLL(y, mu, sigma): '''Negative LogLiklihood - log(1/sqrt(2*pi)e-(y-mu)^2/2/sigma^2) = + 1/2*(log(2*pi)+ (y-mu)^2/2/sigma^2) + Args : + y - [batch_size x seq_length, 2*chunk_samples] + mu - [batch_size x seq_length, chunk_samples] + sigma - [batch_size x seq_length, chunk_samples] + return + NLL ''' with tf.variable_scope('NLL'): sigma_square = tf.maximum(1e-10, tf.square(sigma)) # sigma^2, avoid to be zero @@ -52,7 +57,7 @@ def get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma, y) logger.info("Building VRNNCell starts...") self.cell = VRNNCell(self.chunk_samples, self.rnn_size, self.latent_size) logger.info("Building VRNNCell done.") - + # [batch_size, seq_length, chunk_samples*2] self.input_data = tf.placeholder(dtype=tf.float32, shape=[self.batch_size, self.seq_length, 2*self.chunk_samples], name='input_data') # [batch_size, seq_length, chunk_samples*2] @@ -69,29 +74,27 @@ def get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma, y) self.target = tf.reshape(self.target_data, [-1, 2*self.chunk_samples]) outputs, last_state = tf.contrib.rnn.static_rnn(self.cell, inputs, initial_state=(self.initial_state_c, self.initial_state_h)) - + # outputs seq_length*tuple*[batch_size, chunk_samples] outputs_reshape = [] names = ["enc_mu", "enc_sigma", "dec_mu", "dec_sigma", "prior_mu", "prior_sigma"] - for n,name in enumerate(names): + + for n, name in enumerate(names): with tf.variable_scope(name): - x = tf.stack([o[n] for o in outputs]) - x = tf.transpose(x,[1,0,2]) - x = tf.reshape(x,[self.batch_size*self.seq_length, -1]) + x = tf.stack([o[n] for o in outputs]) # [seq_length, batch_size, chunk_samples] + x = tf.transpose(x,[1,0,2]) # [batch_size, seq_length, chunk_samples] + x = tf.reshape(x, [self.batch_size*self.seq_length, -1]) # [batch_size x seq_length, chunk_samples] outputs_reshape.append(x) - + # tuple*[batch_size x seq_length, chunk_samples] enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma = outputs_reshape - self.final_state_c,self.final_state_h = last_state self.mu = dec_mu self.sigma = dec_sigma + self.final_state_c, self.final_state_h = last_state self.cost = get_lossfunc(enc_mu, enc_sigma, dec_mu, dec_sigma, prior_mu, prior_sigma, self.target) - self.sigma = dec_sigma - self.mu = dec_mu print_vars("trainable_variables") self.lr = tf.Variable(self.lr, trainable = False) self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.cost) - logger.info("Building model done.") self.sess = tf.Session() @@ -116,7 +119,6 @@ def next_batch(self): mixed_noise = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.01 x = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.1 + mixed_noise + np.sin(2 * np.pi * (np.arange(self.seq_length)[np.newaxis, :, np.newaxis] / 10. + t_offset)) - y = np.random.randn(self.batch_size, self.seq_length, (2 * self.chunk_samples)) * 0.1 + mixed_noise + np.sin(2 * np.pi * (np.arange(1, self.seq_length+1)[np.newaxis, :, np.newaxis] / 10. + t_offset)) y[:, :, self.chunk_samples:] = 0. @@ -130,17 +132,17 @@ def initialize(self): def restore(self): saver = tf.train.Saver(tf.global_variables()) ckpt = tf.train.get_checkpoint_state(SAVE_DIR) - print("loading model: ", ckpt.model_checkpoint_path) + print("Load the model from {}".format(ckpt.model_checkpoint_path)) saver.restore(self.sess, ckpt.model_checkpoint_path) def train(self): - self.n_batches = 100 + create_dir(SAVE_DIR) ckpt = tf.train.get_checkpoint_state(SAVE_DIR) saver = tf.train.Saver(tf.global_variables()) if ckpt: saver.restore(self.sess, ckpt.model_checkpoint_path) - print("Loaded model") + print("Load the model from %s"%ckpt.model_checkpoint_path) iteration = 0 for epoch in range(self.num_epochs): @@ -153,67 +155,95 @@ def train(self): train_loss, _, sigma= self.sess.run([self.cost, self.train_op, self.sigma], feed_dict = feed_dict) iteration+=1 - print("{}/{}(epoch {}), train_loss = {:.6f}, std = {:.3f}".format(iteration, self.num_epochs * self.n_batches, epoch, self.chunk_samples * train_loss, sigma.mean(axis=0).mean(axis=0))) - - if iteration % self.save_every == 0 and iteration > 0: - checkpoint_path = os.path.join(dirname, 'model.ckpt') + if iteration % self.log_every == 0 and iteration > 0: + print("{}/{}(epoch {}), train_loss = {:.6f}, std = {:.3f}".format(iteration, self.num_epochs * self.n_batches, epoch+1, self.chunk_samples * train_loss, sigma.mean(axis=0).mean(axis=0))) + checkpoint_path = os.path.join(SAVE_DIR, 'model.ckpt') saver.save(self.sess, checkpoint_path, global_step=iteration) logger.info("model saved to {}".format(checkpoint_path)) def sample(self, num=4410, start=None): + ''' + Args : + num - int + 4410 + start - sequence + None => generate [1, 1, 2*self.chunk_samples] + start.shape==1 => generate [1, 1, 2*self.chunk_samples] + start.shape==2 [seq, 2*self.chunk_samples] + => generate( + Return : + chunks - + mus - + sigmas - + ''' def sample_gaussian(mu, sigma): return mu + (sigma*np.random.randn(*sigma.shape)) + # Initial condition + prev_state = self.sess.run(self.cell.zero_state(1, tf.float32)) # [batch_size, rnn_size] + if start is None: prev_x = np.random.randn(1, 1, 2*self.chunk_samples) elif len(start.shape) == 1: prev_x = start[np.newaxis,np.newaxis,:] elif len(start.shape) == 2: for i in range(start.shape[0]-1): - prev_x = start[i,:] - prev_x = prev_x[np.newaxis,np.newaxis,:] - feed_dict = {self.input_data: prev_x, - self.initial_state_c:prev_state[0], - self.initial_state_h:prev_state[1]} + prev_x = start[i,:] # [2*self.chunk_samples] + prev_x = prev_x[np.newaxis,np.newaxis,:] #[1, 1, 2*self.chunk_samples] + + feed_dict = { + self.input_data : prev_x, + self.initial_state_c : prev_state[0], + self.initial_state_h : prev_state[1] + } - [o_mu, o_sigma, o_rho, prev_state_c, prev_state_h] = self.sess.run( - [self.mu, self.sigma, self.rho,self.final_state_c,self.final_state_h], - feed_dict=feed_dict) + [prev_state_c, prev_state_h] = self.sess.run( + [self.mu, self.sigma, self.final_state_c, self.final_state_h], + feed_dict=feed_dict + ) + prev_state = prev_state_c, prev_state_h - prev_x = start[-1,:] - prev_x = prev_x[np.newaxis,np.newaxis,:] + prev_x = start[-1,:] # [2*self.chunk_samples] + prev_x = prev_x[np.newaxis,np.newaxis,:] # [1,1,2*self.chunk_samples] - prev_state = sess.run(self.cell.zero_state(1, tf.float32)) chunks = np.zeros((num, 2*self.chunk_samples), dtype=np.float32) mus = np.zeros((num, self.chunk_samples), dtype=np.float32) sigmas = np.zeros((num, self.chunk_samples), dtype=np.float32) for i in range(num): - feed_dict = {self.input_data: prev_x, - self.initial_state_c:prev_state[0], - self.initial_state_h:prev_state[1]} - [o_mu, o_sigma, next_state_c, next_state_h] = self.sess.run( - [self.mu, self.sigma, self.final_state_c, self.final_state_h], - feed_dict = feed_dict) + feed_dict = { + self.input_data : prev_x, + self.initial_state_c : prev_state[0], + self.initial_state_h : prev_state[1] + } - next_x = np.hstack((sample_gaussian(o_mu, o_sigma), - 2.*(o_rho > np.random.random(o_rho.shape[:2]))-1.)) + [o_mu, o_sigma, next_state_c, next_state_h] = self.sess.run( + [self.mu, self.sigma, self.final_state_c, self.final_state_h], + feed_dict = feed_dict + ) + next_x = np.hstack( + ( + sample_gaussian(o_mu, o_sigma), np.zeros((1, self.chunk_samples)) + ) + ) # [1, 2*self.chunk_samples] chunks[i] = next_x mus[i] = o_mu sigmas[i] = o_sigma prev_x = np.zeros((1, 1, 2*self.chunk_samples), dtype=np.float32) - prev_x[0][0] = next_x + prev_x[0] = next_x prev_state = next_state_c, next_state_h return chunks, mus, sigmas - if __name__ == '__main__': model = VRNN() model.initialize() model.train() - #model = VRNN(True) - #model.restore() - #sample_data,mus,sigmas = model.sample() + ''' + Test code + model2 = VRNN(True) + model2.restore() + print(model2.sample()) + ''' diff --git a/utils.py b/utils.py index a7fe53e..64e46af 100644 --- a/utils.py +++ b/utils.py @@ -14,4 +14,4 @@ def pickle_load(path): def pickle_save(content, path): '''Save the content on the path''' with open(path, 'wb') as f: - pickle.dump(content, f) \ No newline at end of file + pickle.dump(content, f) From 0685cad1c3b3ab2a2f90b570fbce410449e8227b Mon Sep 17 00:00:00 2001 From: maestrojeong Date: Wed, 26 Jul 2017 20:02:29 +0900 Subject: [PATCH 09/10] Edit REAME.md --- README.md | 21 +++++++++++++-------- config.py | 2 +- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 48db928..cb4cd47 100644 --- a/README.md +++ b/README.md @@ -5,21 +5,26 @@ A variational recurrent neural network as described in: ## Requirements python == 3.5 -tensorflow == 1.2.1L +tensorflow == 1.2.1 +numpy==1.13.1 -## train.py +## main.py * train this model - -## model.py -* **VRNN** structure +```python +python main.py +``` +## cell.py +* **VRNNCell** structure ## utils.py * Basic functions implementation -## config.py -* Basic configuration of model - +## ops.py +* Basic operations based on tensorflow +## config.py +* Basic configuration of model +* Every configuration can be changed here. ![VRNN Structure](graph1.png?raw=true "VRNN Structure") ![Global Structure](graph2.png?raw=true "Global Structure") diff --git a/config.py b/config.py index 4ba7d78..f2a4d8e 100644 --- a/config.py +++ b/config.py @@ -12,7 +12,7 @@ def __init__(self): self.seq_length = 100 # RNN sequence length self.chunk_samples = 1 # number of samples per mdct chunk - self.num_epochs = 1 + self.num_epochs = 5 self.batch_size = 3000 self.n_batches = 100 self.log_every = 20 From e6410b1924662a9a48418e8bfe2de46c9e496dfd Mon Sep 17 00:00:00 2001 From: maestrojeong Date: Wed, 26 Jul 2017 20:03:02 +0900 Subject: [PATCH 10/10] Edit REAME.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cb4cd47..4bd3741 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,6 @@ python main.py ## config.py * Basic configuration of model * Every configuration can be changed here. -![VRNN Structure](graph1.png?raw=true "VRNN Structure") +![VRNN Structure](graph1.png?raw=true "VRNN Structure") ![Global Structure](graph2.png?raw=true "Global Structure")