-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathhfingler_notes
More file actions
287 lines (218 loc) · 11.1 KB
/
hfingler_notes
File metadata and controls
287 lines (218 loc) · 11.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
##
#
# These notes might not be up to date, especially the ones closer to the end
#
##
export SELF_IP=172.31.76.36
task run-svgpu-manager-opt -- 1 --keepworkeralive yes --precreated_workers 1
High load:
task run-svgpu-manager-opt -- 4 --keepworkeralive yes --precreated_workers 1 --scheduler worstfit
task run-svgpu-manager-opt -- 4 --keepworkeralive yes --precreated_workers 2 --scheduler worstfit
Low Load:
task run-svgpu-manager-opt -- 4 --keepworkeralive yes --precreated_workers 1 --scheduler worstfit
task run-svgpu-manager-opt -- 4 --keepworkeralive yes --precreated_workers 2 --scheduler worstfit
task run-svgpu-manager-opt -- 3 --keepworkeralive yes --precreated_workers 1 --scheduler worstfit
task run-svgpu-manager-opt -- 3 --keepworkeralive yes --precreated_workers 2 --scheduler worstfit
task run-svgpu-manager-opt-debug -- 2 --keepworkeralive yes
task run-svgpu-manager-opt-debug -- 2 --keepworkeralive yes --allctx yes --reporting yes --debug_migration 10 2>&1 | tee out.txt
task run-svgpu-manager-opt-debug -- 2 --ttc_addr "127.0.0.1"
task run-svgpu-manager-opt -- 2 --keepworkeralive yes --precreated_workers 1 --ttc_addr 127.0.0.1
task run-svgpu-manager-opt -- 2 --keepworkeralive yes --precreated_workers 2 --allctx yes --reporting yes --debug_migration 3 --ttc_addr 127.0.0.1
task run-svgpu-manager-opt -- 4 --keepworkeralive yes --migration_strat 0 --precreated_workers 1
task run-svgpu-manager-opt -- 4 --keepworkeralive yes --migration_strat 0 --precreated_workers 2 --scheduler worstfit
task run-svgpu-manager-opt -- 4 --keepworkeralive yes --migration_strat 1 --precreated_workers 2 --scheduler worstfit --allctx yes --reporting yes
task run-svgpu-manager-opt -- 4 --keepworkeralive yes --migration_strat 0 --precreated_workers 1 --allctx yes --reporting yes --debug_migration 100
**************************************************************************************************************************************
Compilation:
task build-ava-release
task build-ava-debug
As soon as container is up:
python3 -m pip install libs/onnxruntime_gpu-1.8.0-cp36-cp36m-linux_x86_64.whl
python3 -m pip install libs/tensorflow-1.14.1-cp36-cp36m-linux_x86_64.whl
python3 -m pip install -r src/apps/covidct/requirements.txt
For mlperf apps:
task download-onnxruntime
sudo cp -r src/apps/mlperf/cuda_dumps/onnxruntime/* /cuda_dumps
task run-svgpu-manager-opt-debug -- 2 --keepworkeralive yes
task run-svgpu-manager-opt -- 2 --keepworkeralive yes
apps:
task apps:run-bert-onnxruntime-ava
task apps:face-id-test-ava
task apps:run-resnet50-onnxruntime-ava
task apps:face-det-test-ava
task covidct:run-covidct-in-mem-ava
task apps:run-kmeans-ava
native:
task apps:run-bert-onnxruntime
task apps:face-id-test
task apps:run-resnet50-onnxruntime
task apps:face-det-test
task covidct:run-covidct-in-mem
task apps:run-kmeans
For kmeans class:
-p is poisson
svless:
*** RELEASE OR DEBUG? ***
task BUILD=release serverless:launch-gpu-server
task BUILD=release serverless:launch-gpu-server -- --allctx yes --reporting yes --debug_migration 1000 2>&1 | tee out.txt
./launch_faas_kmeans.py -i inputs/1000000p-10d.txt -k kmeans -d 10 -p
host:
./concurrent_baseline.py -i inputs/1000000p-10d.txt -d 10 -g 4 -p
./single_kmeans_test.py -i inputs/1000000p-10d.txt -d 10 -n 1
Build app images requirements:
task serverless:build-basic-linux-image (once, to create the ubuntu image, takes a long time)
task serverless:build-base-linux -f
*or*
task BUILD=release serverless:build-base-linux -f
# onnx might require pip upgrade
python3 -m pip install transformers pydicom pycocotools opencv-python-headless onnx
Launch backend:
task serverless:launch-resmngr
task serverless:launch-fn-server
task BUILD=debug serverless:launch-gpu-server -- --keepworkeralive yes
Test faas images:
task build-faas-face-det
task serverless:deploy-function -- faas_face_det --gpumem 32 --n 1
task serverless:invoke-faas-face-det -- --count 32 --batchsize 16
task build-faas-face-id
task serverless:deploy-function -- faas_face_id --gpumem 32 --n 1
task serverless:invoke-faas-face-id -- --count 32
task build-faas-classification-detection
task serverless:deploy-function -- faas_classification_detection --gpumem 32 --n 1
task serverless:invoke-faas-resnet50 -- --count 512
task build-faas-bert
task serverless:deploy-function -- faas_bert --mem 4000 --gpumem 32 --n 1
task serverless:invoke-faas-bert -- --count 32
task build-faas-covidct
task serverless:deploy-function -- faas_covidct --gpumem 32 --n 1
task serverless:invoke-faas-covidct
AVA_WORKER_DUMP_DIR=/disk/hfingler/serverless-gpus/src/apps/malloc/cuda_dumps AVA_GUEST_DUMP_DIR=/disk/hfingler/serverless-gpus/src/apps/malloc/cuda_dumps LD_LIBRARY_PATH=/disk/hfingler/serverless-gpus/build/ava/release/onnx_opt/lib AVA_CONFIG_FILE_PATH=/disk/hfingler/serverless-gpus/tools/ava.conf ./migration_bench 8
# Running on CPU:
##use no-GPU container:
run-container-host
##install libs
python3 -m pip install onnxruntime==1.8.0 tensorflow==1.14 boto3
python3 -m pip install -r src/apps/covidct/requirements.txt
RUN_CPU=1 RUN_NATIVE=1 python3 handler.py
in src/apps/kmeans
./kmeans-pthreads -i /disk/hfingler/serverless-gpus/src/apps/kmeans/inputs/1m_16d_16c.txt -c 6
# Native:
python3 -m pip install onnxruntime-gpu==1.4
**************************************************************************************************************************************
Microbench commands (OLD):
task run-svgpu-manager-opt-debug -- 2 --keepworkeralive yes --allctx yes --reporting yes --debug_migration 1
# once to create the worker
AVA_CONFIG_FILE_PATH=tools/ava.conf LD_LIBRARY_PATH=build/ava/debug/onnx_opt/lib ./src/apps/malloc/migration_bench 16
# set port to correct one for created worker
./tools/benchmarking/repeat_cmd.py 10 AVA_WORKER_ADDR=127.0.0.1:5300 AVA_CONFIG_FILE_PATH=tools/ava.conf LD_LIBRARY_PATH=build/ava/debug/onnx_opt/lib ./src/apps/malloc/migration_bench 16
**************************************************************************************************************************************
GPU server from scratch:
sudo apt install -y gcc zsh tmux build-essential pkg-config awscli unzip
sudo apt install linux-headers-generic
sudo apt install python3-pip
#wget https://us.download.nvidia.com/tesla/470.82.01/NVIDIA-Linux-x86_64-470.82.01.run
#sudo sh NVIDIA-Linux-x86_64-470.82.01.run
nvidia-smi
curl https://get.docker.com | sh && sudo systemctl --now enable docker
sudo usermod -aG docker $USER
# log out, log in
docker run hello-world
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
&& curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - \
&& curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
sudo apt-get update
sudo apt-get install -y nvidia-docker2
sudo systemctl restart docker
sudo docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi
sudo nvidia-persistenced
sudo nvidia-smi -ac 877,1530
wget https://go.dev/dl/go1.17.5.linux-amd64.tar.gz
rm -rf /usr/local/go && sudo tar -C /usr/local -xzf go1.17.5.linux-amd64.tar.gz
# add export PATH=$PATH:/usr/local/go/bin to .zshrc
go install github.com/go-task/task/v3/cmd/task@latest
# add ~/go/bin to PATH
git clone git@github.com:hfingler/serverless-gpus.git
cd serverless-gpus
git submodule update --init --recursive
rm -f tools/containers/ava/.Dockerfile.ava-cuda-10.2.stamp
task build-ava-container102
task run-container102
task download-onnxruntime
task download-tensorflow-cudart-dynam
python3 -m pip install build/onnxruntime/prebuilt/onnxruntime_gpu-1.8.0-cp36-cp36m-linux_x86_64.whl
python3 -m pip install build/tensorflow-cudart-dynam/prebuilt/tensorflow-1.14.1-cp36-cp36m-linux_x86_64.whl
python3 -m pip install -r src/apps/covidct/requirements.txt
#
#bert
#
python3 -m pip install onnx
task mlperf:postprocess-onnx-bert-model
task apps:create-eval-features
#test, wont work natively on 102 container, only on 101
task run-svgpu-manager-opt -- 1 --keepworkeralive yes --precreated_workers 1
task BUILD=release apps:run-bert-onnxruntime-ava -- --batchsize 16 --count 512
#
#covid
#
task covidct:download-dataset -f
task covidct:extract-npy
python3 -m pip install cupy-cuda102
task build-zoom-kernel
python3 -m pip uninstall cupy-cuda102
python3 -m pip install -r src/apps/covidct/requirements.txt
sudo cp src/apps/covidct/refactored/zoom_kernel.cubin /cuda_dumps/
#test
task run-svgpu-manager-opt-debug -- 1 --keepworkeralive yes --precreated_workers 1
task BUILD=release SPEC=onnx_opt covidct:run-covidct-in-mem-ava -- --batch-size 1 --count 2
#
#face det
#
#out of container set on .aws/credentials
[default]
aws_access_key_id = YOUR_aws_access_key_id
aws_secret_access_key = YOUR_aws_sercret_access_key_id
task apps:download-face-det-onnx-model
#back to container
task apps:postprocess-face-det-onnx-model
# get WIDER_val.zip somehow, drive download not working, got from zemaitis
cd src/apps/faas_face_det && make && cd -
#test
task run-svgpu-manager-opt -- 1 --keepworkeralive yes --precreated_workers 1
task BUILD=release SPEC=onnx_opt apps:face-det-test-ava -- --bsize 16 --count 256
#
#face id
#
python3 -m pip uninstall onnxruntime-gpu
python3 -m pip install onnxruntime-gpu==1.6
#edit src/apps/faas_face_id_client/detect.py to have only providers = ['CUDAExecutionProvider']
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/libcudnn8_8.0.5.39-1+cuda10.2_amd64.deb
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/libcudnn8-dev_8.0.5.39-1+cuda10.2_amd64.deb
dpkg -i both files
task apps:extract-lfw-faces
#test
task run-svgpu-manager-opt -- 1 --keepworkeralive yes --precreated_workers 1
task BUILD=release SPEC=onnx_opt apps:face-id-test-ava -- --batchsize 16 --count 256
#
#resnet
#
task mlperf:postprocess-resnet50-onnx
sudo apt install npm
sudo npm install -g tget
python3 -m pip install ck
task mlperf:pull-ck-repo
task mlperf:download-imagenet
task mlperf:download-resnet50-onnx
#test
task run-svgpu-manager-opt -- 1 --keepworkeralive yes --precreated_workers 1
task BUILD=release SPEC=onnx_opt apps:run-resnet50-onnxruntime-ava -- --count 2048
#
#kmeans
#
sudo cp -r src/apps/kmeans/cuda_dumps/* /cuda_dumps
#test
task run-svgpu-manager-opt -- 1 --keepworkeralive yes --precreated_workers 1
task BUILD=release SPEC=onnx_opt apps:run-kmeans-ava
#To build TF dyn, had to add
import sys
sys.path.append("/root/.local/lib/python3.6/site-packages")
#to vendor/tensorflow-cudart-dynam/tensorflow/python/keras/preprocessing/__init__.py