Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ addons:
- gfortran-4.9
- liblapack-dev
- clang-3.8
- shellcheck

branches:
only:
Expand Down
12 changes: 8 additions & 4 deletions egs/wsj/s5/local/append_utterances.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
pad_silence=0.5
# End configuration section.

echo "$0 $@"
echo "$0 $*"

[ -f ./path.sh ] && . ./path.sh
. parse_options.sh || exit 1;
. utils/parse_options.sh || exit 1;

if [ $# -ne 2 ]; then
echo "Usage: $0 [options] <input-dir> <output-dir>"
Expand All @@ -27,8 +27,12 @@ for f in spk2gender spk2utt text utt2spk wav.scp; do
done

# Checks if sox is on the path.
sox=`which sox`
[ $? -ne 0 ] && "sox: command not found." && exit 1;
#sox=`which sox`
#[ $? -ne 0 ] && "sox: command not found." && exit 1;
if ! sox=`which sox`; then
echo "sox: command not found";
exit 1;
fi
sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
[ ! -x $sph2pipe ] && "sph2pipe: command not found." && exit 1;

Expand Down
16 changes: 8 additions & 8 deletions egs/wsj/s5/local/chain/compare_wer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ set_names() {


echo -n "# System "
for x in $*; do printf "% 10s" " $(basename $x)"; done
for x in "$@"; do printf "% 10s" " $(basename $x)"; done
echo

strings=(
Expand All @@ -74,7 +74,7 @@ strings=(

for n in 0 1 2 3 4 5 6 7; do
echo -n "${strings[$n]}"
for x in $*; do
for x in "$@"; do
set_names $x # sets $dirname and $epoch_infix
decode_names=(tgpr_dev93 tg_dev93 bd_tgpr_dev93 bd_tgpr_dev93_fg tgpr_eval92 tg_eval92 bd_tgpr_eval92 bd_tgpr_eval92_fg)

Expand All @@ -84,7 +84,7 @@ for n in 0 1 2 3 4 5 6 7; do
echo
if $include_looped; then
echo -n "# [looped:] "
for x in $*; do
for x in "$@"; do
set_names $x # sets $dirname and $epoch_infix
wer=$(cat $dirname/decode_looped_${decode_names[$n]}/scoring_kaldi/best_wer | utils/best_wer.sh | awk '{print $2}')
printf "% 10s" $wer
Expand All @@ -93,7 +93,7 @@ for n in 0 1 2 3 4 5 6 7; do
fi
if $include_online; then
echo -n "# [online:] "
for x in $*; do
for x in "$@"; do
set_names $x # sets $dirname and $epoch_infix
wer=$(cat ${dirname}_online/decode_${decode_names[$n]}/scoring_kaldi/best_wer | utils/best_wer.sh | awk '{print $2}')
printf "% 10s" $wer
Expand All @@ -109,28 +109,28 @@ fi


echo -n "# Final train prob "
for x in $*; do
for x in "$@"; do
prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final valid prob "
for x in $*; do
for x in "$@"; do
prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final train prob (xent)"
for x in $*; do
for x in "$@"; do
prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final valid prob (xent)"
for x in $*; do
for x in "$@"; do
prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
Expand Down
4 changes: 2 additions & 2 deletions egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1a.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ remove_egs=true
test_online_decoding=false # if true, it will run the last decoding stage.

# End configuration section.
echo "$0 $@" # Print the command line for logging
echo "$0 $*" # Print the command line for logging


. ./cmd.sh
Expand Down Expand Up @@ -219,7 +219,7 @@ fi
if [ $stage -le 16 ]; then
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
utils/create_split_dir.pl \
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-"$(date +'%m_%d_%H_%M')"/s5_r2/$dir/egs/storage $dir/egs/storage
fi

steps/nnet3/chain/train.py --stage=$train_stage \
Expand Down
4 changes: 2 additions & 2 deletions egs/wsj/s5/local/chain/tuning/run_cnn_tdnn_1b.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ remove_egs=true
test_online_decoding=false # if true, it will run the last decoding stage.

# End configuration section.
echo "$0 $@" # Print the command line for logging
echo "$0 $*" # Print the command line for logging


. ./cmd.sh
Expand Down Expand Up @@ -222,7 +222,7 @@ fi
if [ $stage -le 16 ]; then
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
utils/create_split_dir.pl \
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-"$(date +'%m_%d_%H_%M')"/s5_r2/$dir/egs/storage $dir/egs/storage
fi

steps/nnet3/chain/train.py --stage=$train_stage \
Expand Down
4 changes: 2 additions & 2 deletions egs/wsj/s5/local/chain/tuning/run_tdnn_1a.sh
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ remove_egs=true
test_online_decoding=false # if true, it will run the last decoding stage.

# End configuration section.
echo "$0 $@" # Print the command line for logging
echo "$0 $*" # Print the command line for logging


. ./cmd.sh
Expand Down Expand Up @@ -226,7 +226,7 @@ fi
if [ $stage -le 16 ]; then
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
utils/create_split_dir.pl \
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-"$(date +'%m_%d_%H_%M')"/s5/$dir/egs/storage $dir/egs/storage
fi

steps/nnet3/chain/train.py --stage=$train_stage \
Expand Down
4 changes: 2 additions & 2 deletions egs/wsj/s5/local/chain/tuning/run_tdnn_1b.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ remove_egs=true
test_online_decoding=false # if true, it will run the last decoding stage.

# End configuration section.
echo "$0 $@" # Print the command line for logging
echo "$0 $*" # Print the command line for logging


. ./cmd.sh
Expand Down Expand Up @@ -201,7 +201,7 @@ fi
if [ $stage -le 16 ]; then
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
utils/create_split_dir.pl \
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-"$(date +'%m_%d_%H_%M')"/s5/$dir/egs/storage $dir/egs/storage
fi

steps/nnet3/chain/train.py --stage=$train_stage \
Expand Down
4 changes: 2 additions & 2 deletions egs/wsj/s5/local/chain/tuning/run_tdnn_lstm_1a.sh
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ remove_egs=true
test_online_decoding=false # if true, it will run the last decoding stage.

# End configuration section.
echo "$0 $@" # Print the command line for logging
echo "$0 $*" # Print the command line for logging


. ./cmd.sh
Expand Down Expand Up @@ -225,7 +225,7 @@ fi
if [ $stage -le 16 ]; then
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
utils/create_split_dir.pl \
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-$(date +'%m_%d_%H_%M')/s5_r2/$dir/egs/storage $dir/egs/storage
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/tedlium-"$(date +'%m_%d_%H_%M')"/s5_r2/$dir/egs/storage $dir/egs/storage
fi

steps/nnet3/chain/train.py --stage=$train_stage \
Expand Down
4 changes: 2 additions & 2 deletions egs/wsj/s5/local/cstr_wsj_data_prep.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ set -e
# - Arnab Ghoshal, 29/05/12

if [ $# -ne 1 ]; then
printf "\nUSAGE: %s <corpus-directory>\n\n" `basename $0`
printf "\nUSAGE: %s <corpus-directory>\n\n" "`basename $0`"
echo "The argument should be a the top-level WSJ corpus directory."
echo "It is assumed that there will be a 'wsj0' and a 'wsj1' subdirectory"
echo "within the top-level corpus directory."
Expand Down Expand Up @@ -169,7 +169,7 @@ prune-lm --threshold=1e-7 $lmdir/lm_tg_5k.arpa.gz $lmdir/lm_tgpr_5k.arpa || exit
gzip -f $lmdir/lm_tgpr_5k.arpa || exit 1;


if [ ! -f wsj0-train-spkrinfo.txt ] || [ `cat wsj0-train-spkrinfo.txt | wc -l` -ne 134 ]; then
if [ ! -f wsj0-train-spkrinfo.txt ] || [ "`cat wsj0-train-spkrinfo.txt | wc -l`" -ne 134 ]; then
rm -f wsj0-train-spkrinfo.txt
wget https://catalog.ldc.upenn.edu/docs/LDC93S6A/wsj0-train-spkrinfo.txt \
|| ( echo "Getting wsj0-train-spkrinfo.txt from backup location" && \
Expand Down
8 changes: 4 additions & 4 deletions egs/wsj/s5/local/cstr_wsj_extend_dict.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

dict_suffix=

echo "$0 $@" # Print the command line for logging
echo "$0 $*" # Print the command line for logging
. utils/parse_options.sh || exit 1;

if [ $# -ne 1 ]; then
Expand Down Expand Up @@ -57,7 +57,7 @@ echo "Getting training data [this should take at least a few seconds; if not, th
# oov.counts below (before adding this rule).

touch $dir/cleaned.gz
if [ `du -m $dir/cleaned.gz | cut -f 1` -eq 73 ]; then
if [ "`du -m $dir/cleaned.gz | cut -f 1`" -eq 73 ]; then
echo "Not getting cleaned data in $dir/cleaned.gz again [already exists]";
else
gunzip -c $srcdir/lng_modl/lm_train/np_data/{87,88,89}/*.z \
Expand Down Expand Up @@ -126,7 +126,7 @@ reverse_dict.pl $dir/f/oovs > $dir/b/oovs
# that it finds.
for d in $dir/f $dir/b; do
(
cd $d
cd $d || exit 1;
cat dict | get_rules.pl 2>get_rules.log >rules
get_rule_hierarchy.pl rules >hierarchy
awk '{print $1}' dict | get_candidate_prons.pl rules dict | \
Expand Down Expand Up @@ -165,7 +165,7 @@ head $dir/oovlist.not_handled.counts
echo "Count of OOVs we handled is `awk '{x+=$1} END{print x}' $dir/oovlist.handled.counts`"
echo "Count of OOVs we couldn't handle is `awk '{x+=$1} END{print x}' $dir/oovlist.not_handled.counts`"
echo "Count of OOVs we didn't handle due to low count is" \
`awk -v thresh=$mincount '{if ($1 < thresh) x+=$1; } END{print x;}' $dir/oov.counts`
"`awk -v thresh=$mincount '{if ($1 < thresh) x+=$1; } END{print x;}' $dir/oov.counts`"
# The two files created above are for humans to look at, as diagnostics.

cat <<EOF | cat - $dir/dict.cmu $dir/dict.oovs_merged | sort | uniq > $dir/lexicon.txt
Expand Down
16 changes: 8 additions & 8 deletions egs/wsj/s5/local/nnet3/compare_wer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ set_names() {


echo -n "# System "
for x in $*; do printf "% 10s" " $(basename $x)"; done
for x in "$@"; do printf "% 10s" " $(basename $x)"; done
echo

strings=(
Expand All @@ -74,7 +74,7 @@ strings=(

for n in 0 1 2 3 4 5 6 7; do
echo -n "${strings[$n]}"
for x in $*; do
for x in "$@"; do
set_names $x # sets $dirname and $epoch_infix
decode_names=(tgpr_dev93 tg_dev93 bd_tgpr_dev93 bd_tgpr_dev93_fg tgpr_eval92 tg_eval92 bd_tgpr_eval92 bd_tgpr_eval92_fg)

Expand All @@ -84,7 +84,7 @@ for n in 0 1 2 3 4 5 6 7; do
echo
if $include_looped; then
echo -n "# [looped:] "
for x in $*; do
for x in "$@"; do
set_names $x # sets $dirname and $epoch_infix
wer=$(cat $dirname/decode_looped_${decode_names[$n]}$epoch_infix/scoring_kaldi/best_wer | utils/best_wer.sh | awk '{print $2}')
printf "% 10s" $wer
Expand All @@ -93,7 +93,7 @@ for n in 0 1 2 3 4 5 6 7; do
fi
if $include_online; then
echo -n "# [online:] "
for x in $*; do
for x in "$@"; do
set_names $x # sets $dirname and $epoch_infix
wer=$(cat ${dirname}_online/decode_${decode_names[$n]}$epoch_infix/scoring_kaldi/best_wer | utils/best_wer.sh | awk '{print $2}')
printf "% 10s" $wer
Expand All @@ -109,28 +109,28 @@ fi


echo -n "# Final train prob "
for x in $*; do
for x in "$@"; do
prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final valid prob "
for x in $*; do
for x in "$@"; do
prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep log-like | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final train acc "
for x in $*; do
for x in "$@"; do
prob=$(grep Overall $x/log/compute_prob_train.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final valid acc "
for x in $*; do
for x in "$@"; do
prob=$(grep Overall $x/log/compute_prob_valid.{final,combined}.log 2>/dev/null | grep accuracy | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
Expand Down
6 changes: 3 additions & 3 deletions egs/wsj/s5/local/nnet3/run_ivector_common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ if [ $stage -le 2 ]; then
# them overwrite each other.
mfccdir=data/${train_set}_sp_hires/data
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage
utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/wsj-"$(date +'%m_%d_%H_%M')"/s5/$mfccdir/storage $mfccdir/storage
fi

for datadir in ${train_set}_sp ${test_sets}; do
Expand All @@ -84,7 +84,7 @@ if [ $stage -le 3 ]; then

# train a diagonal UBM using a subset of about a quarter of the data
num_utts_total=$(wc -l <data/${train_set}_sp_hires/utt2spk)
num_utts=$[$num_utts_total/4]
num_utts=$((num_utts_total/4))
utils/data/subset_data_dir.sh data/${train_set}_sp_hires \
$num_utts ${temp_data_root}/${train_set}_sp_hires_subset

Expand Down Expand Up @@ -120,7 +120,7 @@ if [ $stage -le 5 ]; then
# valid for the non-'max2' data; the utterance list is the same.
ivectordir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $ivectordir/storage ]; then
utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$ivectordir/storage $ivectordir/storage
utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/egs/wsj-"$(date +'%m_%d_%H_%M')"/s5/$ivectordir/storage $ivectordir/storage
fi

# We now extract iVectors on the speed-perturbed training data . With
Expand Down
4 changes: 2 additions & 2 deletions egs/wsj/s5/local/nnet3/run_lstm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ frames_per_chunk=

#End configuration section

echo "$0 $@" # Print the command line for logging
echo "$0 $*" # Print the command line for logging

. ./cmd.sh
. ./path.sh
Expand All @@ -70,7 +70,7 @@ local/nnet3/run_ivector_common.sh --stage $stage || exit 1;
if [ $stage -le 8 ]; then
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
utils/create_split_dir.pl \
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-"$(date +'%m_%d_%H_%M')"/s5/$dir/egs/storage $dir/egs/storage
fi

steps/nnet3/lstm/train.sh --stage $train_stage \
Expand Down
6 changes: 3 additions & 3 deletions egs/wsj/s5/local/nnet3/run_lstm_discriminative.sh
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ fi
model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'`
model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'`

left_context=$[model_left_context + extra_left_context]
right_context=$[model_right_context + extra_right_context]
left_context=$((model_left_context + extra_left_context))
right_context=$((model_right_context + extra_right_context))

frame_subsampling_opt=
if [ -f $srcdir/frame_subsampling_factor ]; then
Expand All @@ -135,7 +135,7 @@ if [ -z "$degs_dir" ]; then
if [ $stage -le 3 ]; then
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${srcdir}_degs/storage ]; then
utils/create_split_dir.pl \
/export/b0{1,2,12,13}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5/${srcdir}_degs/storage ${srcdir}_degs/storage
/export/b0{1,2,12,13}/$USER/kaldi-data/egs/swbd-"$(date +'%m_%d_%H_%M')"/s5/${srcdir}_degs/storage ${srcdir}_degs/storage
fi
# have a higher maximum num-jobs if
if [ -d ${srcdir}_degs/storage ]; then max_jobs=10; else max_jobs=5; fi
Expand Down
Loading