Skip to content

Commit 60199d7

Browse files
authored
Merge pull request #11 from JorisCos/augmentation_fix
Augmentation fix
2 parents 3d44d6d + 7ae7adb commit 60199d7

2 files changed

Lines changed: 26 additions & 6 deletions

File tree

generate_librimix.sh

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,15 @@ wham &
6464

6565
wait
6666

67+
# Path to python
68+
python_path=python
69+
70+
# If you wish to rerun this script in the future please comment this line out.
71+
$python_path scripts/augment_train_noise.py --wham_dir $wham_dir
6772

68-
python scripts/augment_train_noise.py --wham_dir $wham_dir
6973
for n_src in 2 3; do
7074
metadata_dir=metadata/Libri$n_src"Mix"
71-
python scripts/create_librimix_from_metadata.py --librispeech_dir $librispeech_dir \
75+
$python_path scripts/create_librimix_from_metadata.py --librispeech_dir $librispeech_dir \
7276
--wham_dir $wham_dir \
7377
--metadata_dir $metadata_dir \
7478
--librimix_outdir $librimix_outdir \

scripts/augment_train_noise.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,26 @@ def main(args):
1919
# List files in that dir
2020
sound_paths = glob.glob(os.path.join(subdir, '**/*.wav'),
2121
recursive=True)
22-
print(f'Augmenting {subdir} files')
23-
# Transform audio speed
24-
augment_noise(sound_paths, 0.8)
25-
augment_noise(sound_paths, 1.2)
22+
# Avoid running this script if it already have been run
23+
if len(sound_paths) == 60000:
24+
print("It appears that augmented files have already been generated.\n"
25+
"Skipping data augmentation.")
26+
return
27+
elif len(sound_paths) != 20000:
28+
print("It appears that augmented files have not been generated properly\n"
29+
"Resuming augmentation.")
30+
originals = [x for x in sound_paths if 'sp' not in x]
31+
to_be_removed_08 = [x.replace('sp08','') for x in sound_paths if 'sp08' in x]
32+
to_be_removed_12 = [x.replace('sp12','') for x in sound_paths if 'sp12' in x ]
33+
sound_paths_08 = list(set(originals) - set(to_be_removed_08))
34+
sound_paths_12 = list(set(originals) - set(to_be_removed_12))
35+
augment_noise(sound_paths_08, 0.8)
36+
augment_noise(sound_paths_12, 1.2)
37+
else:
38+
print(f'Augmenting {subdir} files')
39+
# Transform audio speed
40+
augment_noise(sound_paths, 0.8)
41+
augment_noise(sound_paths, 1.2)
2642

2743

2844
def augment_noise(sound_paths, speed):

0 commit comments

Comments
 (0)