JeremyCCHsu · zhengjunyue · Aug 11, 2019 · Aug 11, 2019 · Aug 11, 2019 · Aug 11, 2019
diff --git a/README.md b/README.md
@@ -11,14 +11,13 @@ See the note section for difference between this repo and the the paper.
   - Tensorflow-gpu 1.5.0
   - PyWorld
   - librosa
-  - soundfile
 <br/>
 
 
 ### Note:
 1. Be sure to use create a virtual environment (using `conda` or `virtualenv`)
 2. If your Tensorflow is the CPU version, you might have to replace all the `NCHW` ops in my code because Tensorflow-CPU only supports `NHWC` op and will report an error: `InvalidArgumentError (see above for traceback): Conv2DCustomBackpropInputOp only supports NHWC.`
-3. `soundfile` might require `sudo apt-get install` some codecs.  
+
 <br/>
 <br/>
 
@@ -34,12 +33,12 @@ pip install -r requirements.txt
 
 # feature extraction
 python analyzer.py \
---dir_to_wav dataset/vcc2016/wav \
---dir_to_bin dataset/vcc2016/bin 
+--dir_to_wav /fastdata/ac1zy/data/vcc2016/wav \
+--dir_to_bin /fastdata/ac1zy/data/vcc2016/bin 
 
 # collect stats
 python build.py \
---train_file_pattern "dataset/vcc2016/bin/Training Set/*/*.bin" \
+--train_file_pattern "/fastdata/ac1zy/data/vcc2016/bin/training/*/*.bin" \
 --corpus_name vcc2016
 
 # training
@@ -53,7 +52,7 @@ python convert-vawgan.py \
 --module model.vawgan \
 --model VAWGAN \
 --checkpoint logdir/train/[timestampe]/model.ckpt-[modelid] \ 
---file_pattern "./dataset/vcc2016/bin/Testing Set/{}/*.bin"
+--file_pattern "/fastdata/ac1zy/data/vcc2016/bin/testing/{}/*.bin"
 # Just an example; Please fill in `timestampe` and `model id`.
 ```
 
@@ -62,9 +61,9 @@ Description:
 2. Run `analyzer.py` to extract features and write features into binary files. (This takes a few minutes.)  
 3. Run `build.py` to collect stats, such as spectral extrema and pitch.  
 4. To train a VAE or VAWGAN, for example, run  
-5. You can find your models in `./logdir/train/[timestamp]`  
+5. You can find your models in `/fastdata/ac1zy/data/vcc2016/logdir/train/[timestamp]`  
 6. To convert the voice, run  
-7. You can find the converted wav files in `./logdir/output/[timestamp]`  
+7. You can find the converted wav files in `/fastdata/ac1zy/data/vcc2016/logdir/output/[timestamp]`  
 8. The VAE in `model.vae` supports multiple speaker training while that in `model.vawgan` does not.
    The VAE can be trained and used with the following snippets:
 

diff --git a/analyzer.py b/analyzer.py
@@ -11,8 +11,8 @@
 
 
 args = tf.app.flags.FLAGS
-tf.app.flags.DEFINE_string('dir_to_wav', './dataset/vcc2016/wav', 'Dir to *.wav')
-tf.app.flags.DEFINE_string('dir_to_bin', './dataset/vcc2016/bin', 'Dir to output *.bin')
+tf.app.flags.DEFINE_string('dir_to_wav', '/fastdata/ac1zy/data/vcc2016/wav', 'Dir to *.wav')
+tf.app.flags.DEFINE_string('dir_to_bin', '/fastdata/ac1zy/data/vcc2016/bin', 'Dir to output *.bin')
 tf.app.flags.DEFINE_integer('fs', 16000, 'Global sampling frequency')
 tf.app.flags.DEFINE_float('f0_ceil', 500, 'Global f0 ceiling')
 

diff --git a/build.py b/build.py
@@ -1,6 +1,6 @@
 import numpy as np
 import pyworld as pw
-import soundfile as sf
+import librosa
 import tensorflow as tf
 from analyzer import pw2wav, read, read_whole_features
 
@@ -12,7 +12,7 @@
 )
 tf.app.flags.DEFINE_string(
     'train_file_pattern',
-    './dataset/vcc2016/bin/Training Set/*/*.bin',
+    '/fastdata/ac1zy/data/vcc2016/bin/training/*/*.bin',
     'training dir (to *.bin)'
 )
 
@@ -76,7 +76,7 @@ def main():
 
 def test():
     # ==== Test: batch mixer (conclusion: capacity should be larger to make sure good mixing) ====
-    x, y = read('./dataset/vcc2016/bin/*/*/1*001.bin', 32, min_after_dequeue=1024, capacity=2048)
+    x, y = read('/fastdata/ac1zy/data/vcc2016/bin/*/*/1*001.bin', 32, min_after_dequeue=1024, capacity=2048)
     sv = tf.train.Supervisor()
     with sv.managed_session() as sess:
         for _ in range(200):
@@ -85,23 +85,23 @@ def test():
 
 
     # ===== Read binary ====
-    features = read_whole_features('./dataset/vcc2016/bin/Training Set/SF1/*001.bin')
+    features = read_whole_features('/fastdata/ac1zy/data/vcc2016/bin/training/SF1/*001.bin')
 
     sv = tf.train.Supervisor()
     with sv.managed_session() as sess:
         features = sess.run(features)
 
     y = pw2wav(features)
-    sf.write('test1.wav', y, 16000)  # TODO fs should be specified externally.
+    librosa.output.write_wav('test1.wav', y, 16000)  # TODO fs should be specified externally.
 
 
     # ==== Direct read =====
-    f = './dataset/vcc2016/bin/Training Set/SF1/100001.bin'
+    f = '/fastdata/ac1zy/data/vcc2016/bin/training/SF1/100001.bin'
     features = np.fromfile(f, np.float32)
     features = np.reshape(features, [-1, 513*2 + 1 + 1 + 1]) # f0, en, spk
 
     y = pw2wav(features)
-    sf.write('test2.wav', y, 16000)
+    librosa.output.write_wav('test2.wav', y, 16000)
 
 
 if __name__ == '__main__':

diff --git a/util/wrapper.py b/util/wrapper.py
@@ -114,7 +114,7 @@ def get_default_logdir(logdir_root):
             'You can only specify either --logdir or --logdir_root')
 
     if args.logdir_root is None:
-        logdir_root = 'logdir'
+        logdir_root = '/fastdata/ac1zy/data/vcc2016/logdir'
 
     if args.logdir is None:
         logdir = get_default_logdir(logdir_root)