forked from clhne/lambda-tensorflow-benchmark
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbenchmark.sh
More file actions
executable file
·161 lines (133 loc) · 3.49 KB
/
benchmark.sh
File metadata and controls
executable file
·161 lines (133 loc) · 3.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/bin/bash -e
GPU_INDEX=${1:-0}
IFS=', ' read -r -a gpus <<< "$GPU_INDEX"
ITERATIONS=${2:-10}
MIN_NUM_GPU=${#gpus[@]}
MAX_NUM_GPU=$MIN_NUM_GPU
export CUDA_VISIBLE_DEVICES=$GPU_INDEX
SCRIPT_DIR="$(pwd)/benchmarks/scripts/tf_cnn_benchmarks"
CPU_NAME="$(lscpu | grep "Model name:" | sed -r 's/Model name:\s{1,}//g' | awk '{ print $4 }')";
if [ $CPU_NAME = "CPU" ]; then
# CPU can show up at different locations
CPU_NAME="$(lscpu | grep "Model name:" | sed -r 's/Model name:\s{1,}//g' | awk '{ print $3 }')";
fi
GPU_NAME="$(nvidia-smi -i 0 --query-gpu=gpu_name --format=csv,noheader)"
GPU_NAME="${GPU_NAME// /_}"
CONFIG_NAME="${CPU_NAME}-${GPU_NAME}"
echo $CONFIG_NAME
DATA_DIR="/home/${USER}/data/imagenet_mini"
LOG_DIR="$(pwd)/${CONFIG_NAME}.logs"
NUM_BATCHES=100
MODELS=(
resnet50
resnet152
inception3
inception4
vgg16
alexnet
ssd300
)
VARIABLE_UPDATE=(
replicated
parameter_server
)
DATA_MODE=(
syn
)
PRECISION=(
fp32
fp16
)
declare -A BATCH_SIZES=(
[resnet50]=64
[resnet101]=64
[resnet152]=32
[inception3]=64
[inception4]=16
[vgg16]=64
[alexnet]=512
[ssd300]=32
)
declare -A DATASET_NAMES=(
[resnet50]=imagenet
[resnet101]=imagenet
[resnet152]=imagenet
[inception3]=imagenet
[inception4]=imagenet
[vgg16]=imagenet
[alexnet]=imagenet
[ssd300]=coco
)
run_benchmark() {
local model="$1"
local batch_size=$2
local config_name=$3
local num_gpus=$4
local iter=$5
local data_mode=$6
local update_mode=$7
local distortions=$8
local dataset_name=$9
local precision="${10}"
pushd "$SCRIPT_DIR" &> /dev/null
local args=()
local output="${LOG_DIR}/${model}-${data_mode}-${variable_update}-${precision}"
args+=("--optimizer=sgd")
args+=("--model=$model")
args+=("--num_gpus=$num_gpus")
args+=("--batch_size=$batch_size")
args+=("--variable_update=$variable_update")
args+=("--distortions=$distortions")
args+=("--num_batches=$NUM_BATCHES")
args+=("--data_name=$dataset_name")
# args+=("--all_reduce_spec=nccl")
if [ $data_mode = real ]; then
args+=("--data_dir=$DATA_DIR")
fi
if $distortions; then
output+="-distortions"
fi
if [ $precision = fp16 ]; then
args+=("--use_fp16=True")
fi
output+="-${num_gpus}gpus-${batch_size}-${iter}.log"
mkdir -p "${LOG_DIR}" || true
# echo $output
echo ${args[@]}
python3 tf_cnn_benchmarks.py "${args[@]}" |& tee "$output"
popd &> /dev/null
}
run_benchmark_all() {
local data_mode="$1"
local variable_update="$2"
local distortions="$3"
local precision="$4"
for model in "${MODELS[@]}"; do
local batch_size=${BATCH_SIZES[$model]}
local dataset_name=${DATASET_NAMES[$model]}
for num_gpu in `seq ${MAX_NUM_GPU} -1 ${MIN_NUM_GPU}`; do
for iter in $(seq 1 $ITERATIONS); do
run_benchmark "$model" $batch_size $CONFIG_NAME $num_gpu $iter $data_mode $variable_update $distortions $dataset_name $precision
done
done
done
}
main() {
local data_mode variable_update distortion_mode model num_gpu iter benchmark_name distortions precision
local cpu_line table_line
for precision in "${PRECISION[@]}"; do
for data_mode in "${DATA_MODE[@]}"; do
for variable_update in "${VARIABLE_UPDATE[@]}"; do
for distortions in true false; do
if [ $data_mode = syn ] && $distortions ; then
# skip distortion for synthetic data
:
else
run_benchmark_all $data_mode $variable_update $distortions $precision
fi
done
done
done
done
}
main "$@"