lambda-tensorflow-benchmark/benchmark.sh at master · ironerumi/lambda-tensorflow-benchmark · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/bin/bash -e

GPU_INDEX=${1:-0}
IFS=', ' read -r -a gpus <<< "$GPU_INDEX"

ITERATIONS=${2:-10}

MIN_NUM_GPU=${#gpus[@]}
MAX_NUM_GPU=$MIN_NUM_GPU
export CUDA_VISIBLE_DEVICES=$GPU_INDEX

SCRIPT_DIR="$(pwd)/benchmarks/scripts/tf_cnn_benchmarks"

CPU_NAME="$(lscpu | grep "Model name:" | sed -r 's/Model name:\s{1,}//g' | awk '{ print $4 }')";
if [ $CPU_NAME = "CPU" ]; then
  # CPU can show up at different locations
  CPU_NAME="$(lscpu | grep "Model name:" | sed -r 's/Model name:\s{1,}//g' | awk '{ print $3 }')";
fi

GPU_NAME="$(nvidia-smi -i 0 --query-gpu=gpu_name --format=csv,noheader)"
GPU_NAME="${GPU_NAME// /_}"

CONFIG_NAME="${CPU_NAME}-${GPU_NAME}"
echo $CONFIG_NAME


DATA_DIR="/home/${USER}/data/imagenet_mini"
LOG_DIR="$(pwd)/${CONFIG_NAME}.logs"

NUM_BATCHES=100

MODELS=(
  resnet50
  resnet152
  inception3
  inception4
  vgg16
  alexnet
  ssd300
)

VARIABLE_UPDATE=(
  replicated
  parameter_server
)

DATA_MODE=(
  syn
)

PRECISION=(
  fp32
  fp16
)

declare -A BATCH_SIZES=(
  [resnet50]=64
  [resnet101]=64
  [resnet152]=32
  [inception3]=64
  [inception4]=16
  [vgg16]=64
  [alexnet]=512
  [ssd300]=32
)

declare -A DATASET_NAMES=(
  [resnet50]=imagenet
  [resnet101]=imagenet
  [resnet152]=imagenet
  [inception3]=imagenet
  [inception4]=imagenet
  [vgg16]=imagenet
  [alexnet]=imagenet
  [ssd300]=coco
)

run_benchmark() {

  local model="$1"
  local batch_size=$2
  local config_name=$3
  local num_gpus=$4
  local iter=$5
  local data_mode=$6
  local update_mode=$7
  local distortions=$8
  local dataset_name=$9
  local precision="${10}"

  pushd "$SCRIPT_DIR" &> /dev/null
  local args=()
  local output="${LOG_DIR}/${model}-${data_mode}-${variable_update}-${precision}"

  args+=("--optimizer=sgd")
  args+=("--model=$model")
  args+=("--num_gpus=$num_gpus")
  args+=("--batch_size=$batch_size")
  args+=("--variable_update=$variable_update")
  args+=("--distortions=$distortions")
  args+=("--num_batches=$NUM_BATCHES")
  args+=("--data_name=$dataset_name")
  # args+=("--all_reduce_spec=nccl")

  if [ $data_mode = real ]; then
    args+=("--data_dir=$DATA_DIR")
  fi
  if $distortions; then
    output+="-distortions"
  fi
  if [ $precision = fp16 ]; then
    args+=("--use_fp16=True")
  fi
  output+="-${num_gpus}gpus-${batch_size}-${iter}.log"

  mkdir -p "${LOG_DIR}" || true

  # echo $output
  echo ${args[@]}
  python3 tf_cnn_benchmarks.py "${args[@]}" |& tee "$output"
  popd &> /dev/null
}

run_benchmark_all() {
  local data_mode="$1"
  local variable_update="$2"
  local distortions="$3"
  local precision="$4"

  for model in "${MODELS[@]}"; do
    local batch_size=${BATCH_SIZES[$model]}
    local dataset_name=${DATASET_NAMES[$model]}
    for num_gpu in `seq ${MAX_NUM_GPU} -1 ${MIN_NUM_GPU}`; do
      for iter in $(seq 1 $ITERATIONS); do
        run_benchmark "$model" $batch_size $CONFIG_NAME $num_gpu $iter $data_mode $variable_update $distortions $dataset_name $precision
      done
    done
  done
}

main() {
  local data_mode variable_update distortion_mode model num_gpu iter benchmark_name distortions precision
  local cpu_line table_line
  for precision in "${PRECISION[@]}"; do
    for data_mode in "${DATA_MODE[@]}"; do
      for variable_update in "${VARIABLE_UPDATE[@]}"; do
        for distortions in true false; do
          if [ $data_mode = syn ] && $distortions ; then
            # skip distortion for synthetic data
            :
          else
            run_benchmark_all $data_mode $variable_update $distortions $precision
          fi
        done
      done
    done
  done

}

main "$@"