From 3c5ec54a081ba011eedf83fd8fda978857086302 Mon Sep 17 00:00:00 2001
From: junjunjiang <45668655+zhengjunyue@users.noreply.github.com>
Date: Sun, 11 Aug 2019 01:29:25 +0100
Subject: [PATCH 1/7] Update README.md
---
README.md | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/README.md b/README.md
index f0f09da..610c34e 100644
--- a/README.md
+++ b/README.md
@@ -34,12 +34,12 @@ pip install -r requirements.txt
# feature extraction
python analyzer.py \
---dir_to_wav dataset/vcc2016/wav \
---dir_to_bin dataset/vcc2016/bin
+--dir_to_wav /fastdata/ac1zy/data/vcc2016/wav \
+--dir_to_bin /fastdata/ac1zy/data/vcc2016/bin
# collect stats
python build.py \
---train_file_pattern "dataset/vcc2016/bin/Training Set/*/*.bin" \
+--train_file_pattern "/fastdata/ac1zy/data/vcc2016/bin/training/*/*.bin" \
--corpus_name vcc2016
# training
@@ -53,7 +53,7 @@ python convert-vawgan.py \
--module model.vawgan \
--model VAWGAN \
--checkpoint logdir/train/[timestampe]/model.ckpt-[modelid] \
---file_pattern "./dataset/vcc2016/bin/Testing Set/{}/*.bin"
+--file_pattern "/fastdata/ac1zy/data/vcc2016/bin/testing/{}/*.bin"
# Just an example; Please fill in `timestampe` and `model id`.
```
@@ -62,9 +62,9 @@ Description:
2. Run `analyzer.py` to extract features and write features into binary files. (This takes a few minutes.)
3. Run `build.py` to collect stats, such as spectral extrema and pitch.
4. To train a VAE or VAWGAN, for example, run
-5. You can find your models in `./logdir/train/[timestamp]`
+5. You can find your models in `/fastdata/ac1zy/data/vcc2016/logdir/train/[timestamp]`
6. To convert the voice, run
-7. You can find the converted wav files in `./logdir/output/[timestamp]`
+7. You can find the converted wav files in `/fastdata/ac1zy/data/vcc2016/logdir/output/[timestamp]`
8. The VAE in `model.vae` supports multiple speaker training while that in `model.vawgan` does not.
The VAE can be trained and used with the following snippets:
From cfa2705c9e476b38647097cff89954a88fc369eb Mon Sep 17 00:00:00 2001
From: junjunjiang <45668655+zhengjunyue@users.noreply.github.com>
Date: Sun, 11 Aug 2019 01:38:37 +0100
Subject: [PATCH 2/7] Update README.md
---
README.md | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 610c34e..2673351 100644
--- a/README.md
+++ b/README.md
@@ -11,14 +11,13 @@ See the note section for difference between this repo and the the paper.
- Tensorflow-gpu 1.5.0
- PyWorld
- librosa
- - soundfile
### Note:
1. Be sure to use create a virtual environment (using `conda` or `virtualenv`)
2. If your Tensorflow is the CPU version, you might have to replace all the `NCHW` ops in my code because Tensorflow-CPU only supports `NHWC` op and will report an error: `InvalidArgumentError (see above for traceback): Conv2DCustomBackpropInputOp only supports NHWC.`
-3. `soundfile` might require `sudo apt-get install` some codecs.
+
From 70b8ba6aac37ba66e586de52e2d47082a262b484 Mon Sep 17 00:00:00 2001
From: junjunjiang <45668655+zhengjunyue@users.noreply.github.com>
Date: Sun, 11 Aug 2019 01:40:31 +0100
Subject: [PATCH 3/7] Update analyzer.py
---
analyzer.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/analyzer.py b/analyzer.py
index 1e36ad5..d91c396 100644
--- a/analyzer.py
+++ b/analyzer.py
@@ -11,8 +11,8 @@
args = tf.app.flags.FLAGS
-tf.app.flags.DEFINE_string('dir_to_wav', './dataset/vcc2016/wav', 'Dir to *.wav')
-tf.app.flags.DEFINE_string('dir_to_bin', './dataset/vcc2016/bin', 'Dir to output *.bin')
+tf.app.flags.DEFINE_string('dir_to_wav', '/fastdata/ac1zy/data/vcc2016/wav', 'Dir to *.wav')
+tf.app.flags.DEFINE_string('dir_to_bin', '/fastdata/ac1zy/data/vcc2016/bin', 'Dir to output *.bin')
tf.app.flags.DEFINE_integer('fs', 16000, 'Global sampling frequency')
tf.app.flags.DEFINE_float('f0_ceil', 500, 'Global f0 ceiling')
From 4220a864d93d97146b27b553af2c160d6eb9bd0a Mon Sep 17 00:00:00 2001
From: junjunjiang <45668655+zhengjunyue@users.noreply.github.com>
Date: Sun, 11 Aug 2019 01:41:54 +0100
Subject: [PATCH 4/7] Update wrapper.py
---
util/wrapper.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/util/wrapper.py b/util/wrapper.py
index 215c663..b64dfff 100644
--- a/util/wrapper.py
+++ b/util/wrapper.py
@@ -114,7 +114,7 @@ def get_default_logdir(logdir_root):
'You can only specify either --logdir or --logdir_root')
if args.logdir_root is None:
- logdir_root = 'logdir'
+ logdir_root = '/fastdata/ac1zy/data/vcc2016/logdir'
if args.logdir is None:
logdir = get_default_logdir(logdir_root)
From 345fbffad8c6d85550bdc0490edcc2411860ecb5 Mon Sep 17 00:00:00 2001
From: junjunjiang <45668655+zhengjunyue@users.noreply.github.com>
Date: Sun, 11 Aug 2019 01:44:28 +0100
Subject: [PATCH 5/7] Update build.py
---
build.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/build.py b/build.py
index 5b4deb6..ef9fdc4 100644
--- a/build.py
+++ b/build.py
@@ -12,7 +12,7 @@
)
tf.app.flags.DEFINE_string(
'train_file_pattern',
- './dataset/vcc2016/bin/Training Set/*/*.bin',
+ '/fastdata/ac1zy/data/vcc2016/bin/training/*/*.bin',
'training dir (to *.bin)'
)
@@ -76,7 +76,7 @@ def main():
def test():
# ==== Test: batch mixer (conclusion: capacity should be larger to make sure good mixing) ====
- x, y = read('./dataset/vcc2016/bin/*/*/1*001.bin', 32, min_after_dequeue=1024, capacity=2048)
+ x, y = read('/fastdata/ac1zy/data/vcc2016/bin/*/*/1*001.bin', 32, min_after_dequeue=1024, capacity=2048)
sv = tf.train.Supervisor()
with sv.managed_session() as sess:
for _ in range(200):
@@ -85,7 +85,7 @@ def test():
# ===== Read binary ====
- features = read_whole_features('./dataset/vcc2016/bin/Training Set/SF1/*001.bin')
+ features = read_whole_features('/fastdata/ac1zy/data/vcc2016/bin/training/SF1/*001.bin')
sv = tf.train.Supervisor()
with sv.managed_session() as sess:
@@ -96,7 +96,7 @@ def test():
# ==== Direct read =====
- f = './dataset/vcc2016/bin/Training Set/SF1/100001.bin'
+ f = '/fastdata/ac1zy/data/vcc2016/bin/training/SF1/100001.bin'
features = np.fromfile(f, np.float32)
features = np.reshape(features, [-1, 513*2 + 1 + 1 + 1]) # f0, en, spk
From a5eb703efab09c81406d53d114ed2a1e024b8835 Mon Sep 17 00:00:00 2001
From: junjunjiang <45668655+zhengjunyue@users.noreply.github.com>
Date: Sun, 11 Aug 2019 01:45:34 +0100
Subject: [PATCH 6/7] Update build.py
---
build.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/build.py b/build.py
index ef9fdc4..b05e99b 100644
--- a/build.py
+++ b/build.py
@@ -92,7 +92,7 @@ def test():
features = sess.run(features)
y = pw2wav(features)
- sf.write('test1.wav', y, 16000) # TODO fs should be specified externally.
+ librosa.output.write_wav('test1.wav', y, 16000) # TODO fs should be specified externally.
# ==== Direct read =====
@@ -101,7 +101,7 @@ def test():
features = np.reshape(features, [-1, 513*2 + 1 + 1 + 1]) # f0, en, spk
y = pw2wav(features)
- sf.write('test2.wav', y, 16000)
+ librosa.output.write_wav('test2.wav', y, 16000)
if __name__ == '__main__':
From 7d0dedd74ced0ebaf2cc7074e400692f171bc425 Mon Sep 17 00:00:00 2001
From: junjunjiang <45668655+zhengjunyue@users.noreply.github.com>
Date: Sun, 11 Aug 2019 01:46:02 +0100
Subject: [PATCH 7/7] Update build.py
---
build.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/build.py b/build.py
index b05e99b..5e9d534 100644
--- a/build.py
+++ b/build.py
@@ -1,6 +1,6 @@
import numpy as np
import pyworld as pw
-import soundfile as sf
+import librosa
import tensorflow as tf
from analyzer import pw2wav, read, read_whole_features