From 3c5ec54a081ba011eedf83fd8fda978857086302 Mon Sep 17 00:00:00 2001 From: junjunjiang <45668655+zhengjunyue@users.noreply.github.com> Date: Sun, 11 Aug 2019 01:29:25 +0100 Subject: [PATCH 1/7] Update README.md --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index f0f09da..610c34e 100644 --- a/README.md +++ b/README.md @@ -34,12 +34,12 @@ pip install -r requirements.txt # feature extraction python analyzer.py \ ---dir_to_wav dataset/vcc2016/wav \ ---dir_to_bin dataset/vcc2016/bin +--dir_to_wav /fastdata/ac1zy/data/vcc2016/wav \ +--dir_to_bin /fastdata/ac1zy/data/vcc2016/bin # collect stats python build.py \ ---train_file_pattern "dataset/vcc2016/bin/Training Set/*/*.bin" \ +--train_file_pattern "/fastdata/ac1zy/data/vcc2016/bin/training/*/*.bin" \ --corpus_name vcc2016 # training @@ -53,7 +53,7 @@ python convert-vawgan.py \ --module model.vawgan \ --model VAWGAN \ --checkpoint logdir/train/[timestampe]/model.ckpt-[modelid] \ ---file_pattern "./dataset/vcc2016/bin/Testing Set/{}/*.bin" +--file_pattern "/fastdata/ac1zy/data/vcc2016/bin/testing/{}/*.bin" # Just an example; Please fill in `timestampe` and `model id`. ``` @@ -62,9 +62,9 @@ Description: 2. Run `analyzer.py` to extract features and write features into binary files. (This takes a few minutes.) 3. Run `build.py` to collect stats, such as spectral extrema and pitch. 4. To train a VAE or VAWGAN, for example, run -5. You can find your models in `./logdir/train/[timestamp]` +5. You can find your models in `/fastdata/ac1zy/data/vcc2016/logdir/train/[timestamp]` 6. To convert the voice, run -7. You can find the converted wav files in `./logdir/output/[timestamp]` +7. You can find the converted wav files in `/fastdata/ac1zy/data/vcc2016/logdir/output/[timestamp]` 8. The VAE in `model.vae` supports multiple speaker training while that in `model.vawgan` does not. The VAE can be trained and used with the following snippets: From cfa2705c9e476b38647097cff89954a88fc369eb Mon Sep 17 00:00:00 2001 From: junjunjiang <45668655+zhengjunyue@users.noreply.github.com> Date: Sun, 11 Aug 2019 01:38:37 +0100 Subject: [PATCH 2/7] Update README.md --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 610c34e..2673351 100644 --- a/README.md +++ b/README.md @@ -11,14 +11,13 @@ See the note section for difference between this repo and the the paper. - Tensorflow-gpu 1.5.0 - PyWorld - librosa - - soundfile
### Note: 1. Be sure to use create a virtual environment (using `conda` or `virtualenv`) 2. If your Tensorflow is the CPU version, you might have to replace all the `NCHW` ops in my code because Tensorflow-CPU only supports `NHWC` op and will report an error: `InvalidArgumentError (see above for traceback): Conv2DCustomBackpropInputOp only supports NHWC.` -3. `soundfile` might require `sudo apt-get install` some codecs. +

From 70b8ba6aac37ba66e586de52e2d47082a262b484 Mon Sep 17 00:00:00 2001 From: junjunjiang <45668655+zhengjunyue@users.noreply.github.com> Date: Sun, 11 Aug 2019 01:40:31 +0100 Subject: [PATCH 3/7] Update analyzer.py --- analyzer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/analyzer.py b/analyzer.py index 1e36ad5..d91c396 100644 --- a/analyzer.py +++ b/analyzer.py @@ -11,8 +11,8 @@ args = tf.app.flags.FLAGS -tf.app.flags.DEFINE_string('dir_to_wav', './dataset/vcc2016/wav', 'Dir to *.wav') -tf.app.flags.DEFINE_string('dir_to_bin', './dataset/vcc2016/bin', 'Dir to output *.bin') +tf.app.flags.DEFINE_string('dir_to_wav', '/fastdata/ac1zy/data/vcc2016/wav', 'Dir to *.wav') +tf.app.flags.DEFINE_string('dir_to_bin', '/fastdata/ac1zy/data/vcc2016/bin', 'Dir to output *.bin') tf.app.flags.DEFINE_integer('fs', 16000, 'Global sampling frequency') tf.app.flags.DEFINE_float('f0_ceil', 500, 'Global f0 ceiling') From 4220a864d93d97146b27b553af2c160d6eb9bd0a Mon Sep 17 00:00:00 2001 From: junjunjiang <45668655+zhengjunyue@users.noreply.github.com> Date: Sun, 11 Aug 2019 01:41:54 +0100 Subject: [PATCH 4/7] Update wrapper.py --- util/wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/wrapper.py b/util/wrapper.py index 215c663..b64dfff 100644 --- a/util/wrapper.py +++ b/util/wrapper.py @@ -114,7 +114,7 @@ def get_default_logdir(logdir_root): 'You can only specify either --logdir or --logdir_root') if args.logdir_root is None: - logdir_root = 'logdir' + logdir_root = '/fastdata/ac1zy/data/vcc2016/logdir' if args.logdir is None: logdir = get_default_logdir(logdir_root) From 345fbffad8c6d85550bdc0490edcc2411860ecb5 Mon Sep 17 00:00:00 2001 From: junjunjiang <45668655+zhengjunyue@users.noreply.github.com> Date: Sun, 11 Aug 2019 01:44:28 +0100 Subject: [PATCH 5/7] Update build.py --- build.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/build.py b/build.py index 5b4deb6..ef9fdc4 100644 --- a/build.py +++ b/build.py @@ -12,7 +12,7 @@ ) tf.app.flags.DEFINE_string( 'train_file_pattern', - './dataset/vcc2016/bin/Training Set/*/*.bin', + '/fastdata/ac1zy/data/vcc2016/bin/training/*/*.bin', 'training dir (to *.bin)' ) @@ -76,7 +76,7 @@ def main(): def test(): # ==== Test: batch mixer (conclusion: capacity should be larger to make sure good mixing) ==== - x, y = read('./dataset/vcc2016/bin/*/*/1*001.bin', 32, min_after_dequeue=1024, capacity=2048) + x, y = read('/fastdata/ac1zy/data/vcc2016/bin/*/*/1*001.bin', 32, min_after_dequeue=1024, capacity=2048) sv = tf.train.Supervisor() with sv.managed_session() as sess: for _ in range(200): @@ -85,7 +85,7 @@ def test(): # ===== Read binary ==== - features = read_whole_features('./dataset/vcc2016/bin/Training Set/SF1/*001.bin') + features = read_whole_features('/fastdata/ac1zy/data/vcc2016/bin/training/SF1/*001.bin') sv = tf.train.Supervisor() with sv.managed_session() as sess: @@ -96,7 +96,7 @@ def test(): # ==== Direct read ===== - f = './dataset/vcc2016/bin/Training Set/SF1/100001.bin' + f = '/fastdata/ac1zy/data/vcc2016/bin/training/SF1/100001.bin' features = np.fromfile(f, np.float32) features = np.reshape(features, [-1, 513*2 + 1 + 1 + 1]) # f0, en, spk From a5eb703efab09c81406d53d114ed2a1e024b8835 Mon Sep 17 00:00:00 2001 From: junjunjiang <45668655+zhengjunyue@users.noreply.github.com> Date: Sun, 11 Aug 2019 01:45:34 +0100 Subject: [PATCH 6/7] Update build.py --- build.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build.py b/build.py index ef9fdc4..b05e99b 100644 --- a/build.py +++ b/build.py @@ -92,7 +92,7 @@ def test(): features = sess.run(features) y = pw2wav(features) - sf.write('test1.wav', y, 16000) # TODO fs should be specified externally. + librosa.output.write_wav('test1.wav', y, 16000) # TODO fs should be specified externally. # ==== Direct read ===== @@ -101,7 +101,7 @@ def test(): features = np.reshape(features, [-1, 513*2 + 1 + 1 + 1]) # f0, en, spk y = pw2wav(features) - sf.write('test2.wav', y, 16000) + librosa.output.write_wav('test2.wav', y, 16000) if __name__ == '__main__': From 7d0dedd74ced0ebaf2cc7074e400692f171bc425 Mon Sep 17 00:00:00 2001 From: junjunjiang <45668655+zhengjunyue@users.noreply.github.com> Date: Sun, 11 Aug 2019 01:46:02 +0100 Subject: [PATCH 7/7] Update build.py --- build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.py b/build.py index b05e99b..5e9d534 100644 --- a/build.py +++ b/build.py @@ -1,6 +1,6 @@ import numpy as np import pyworld as pw -import soundfile as sf +import librosa import tensorflow as tf from analyzer import pw2wav, read, read_whole_features