-
Notifications
You must be signed in to change notification settings - Fork 37
Expand file tree
/
Copy pathbuild.sh
More file actions
executable file
·368 lines (342 loc) · 10.1 KB
/
build.sh
File metadata and controls
executable file
·368 lines (342 loc) · 10.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
#!/bin/bash
set -e
# Environment variables used when building torch_musa
#
# TORCH_MUSA_ARCH_LIST
# specify which MUSA architectures to build for.
# ie 'TORCH_MUSA_ARCH_LIST="21;22"'
#
CUR_DIR=$(
cd $(dirname $0)
pwd
)
TORCH_MUSA_HOME=$CUR_DIR
PYTORCH_PATH=${PYTORCH_REPO_PATH:-$(realpath ${TORCH_MUSA_HOME}/../pytorch)}
TORCH_PATCHES_DIR=${TORCH_MUSA_HOME}/torch_patches/
KINETO_URL=${KINETO_URL:-https://sh-code.mthreads.com/ai/kineto.git}
KINETO_TAG=v2.7.0
BUILD_WHEEL=0
DEBUG_MODE=0
ASAN_MODE=0
BUILD_TORCH=1
BUILD_TORCH_MUSA=1
USE_KINETO=${USE_KINETO:-1}
ONLY_PATCH=0
CLEAN=0
COMPILE_FP64=1
PYTORCH_TAG=v2.9.0
PYTORCH_BUILD_VERSION="${PYTORCH_TAG:1}"
PYTORCH_BUILD_NUMBER=0 # This is used for official torch distribution.
USE_MCCL=${USE_MCCL:-1}
usage() {
echo -e "\033[1;32mThis script is used to build PyTorch and Torch_MUSA. \033[0m"
echo -e "\033[1;32mParameters usage: \033[0m"
echo -e "\033[32m --all : Means building both PyTorch and Torch_MUSA. \033[0m"
echo -e "\033[32m --fp64 : Means compiling fp64 data type in kernels using mcc in Torch_MUSA. \033[0m"
echo -e "\033[32m -m/--musa : Means building Torch_MUSA only. \033[0m"
echo -e "\033[32m -t/--torch : Means building original PyTorch only. \033[0m"
echo -e "\033[32m -d/--debug : Means building in debug mode. \033[0m"
echo -e "\033[32m -a/--asan : Means building in asan mode. \033[0m"
echo -e "\033[32m -c/--clean : Means cleaning everything that has been built. \033[0m"
echo -e "\033[32m -p/--patch : Means applying patches only. \033[0m"
echo -e "\033[32m -w/--wheel : Means generating wheel after building. \033[0m"
echo -e "\033[32m -n/--no_kineto : Disable kineto. \033[0m"
echo -e "\033[32m -h/--help : Help information. \033[0m"
}
# parse paremters
parameters=$(getopt -o +mtdacpwnh --long all,fp64,musa,torch,debug,asan,clean,wheel,no_kineto,patch,help, -n "$0" -- "$@")
[ $? -ne 0 ] && {
echo -e "\033[34mTry '$0 --help' for more information. \033[0m"
exit 1
}
eval set -- "$parameters"
while true; do
case "$1" in
--all)
BUILD_TORCH=1
BUILD_TORCH_MUSA=1
shift
;;
--fp64)
COMPILE_FP64=1
shift
;;
-m | --musa)
BUILD_TORCH_MUSA=1
BUILD_TORCH=0
shift
;;
-t | --torch)
BUILD_TORCH_MUSA=0
BUILD_TORCH=1
shift
;;
-d | --debug)
DEBUG_MODE=1
shift
;;
-a | --asan)
ASAN_MODE=1
shift
;;
-c | --clean)
CLEAN=1
shift
;;
-w | --wheel)
BUILD_WHEEL=1
shift
;;
-n | --no_kineto)
USE_KINETO=0
shift
;;
-p | --patch)
ONLY_PATCH=1
shift
;;
-h | --help)
usage
exit
;;
--)
shift
break
;;
*)
usage
exit 1
;;
esac
done
cmd_check(){
cmd="$1"
if command -v ${cmd} >/dev/null 2>&1; then
echo "- cmd exist : ${cmd}"
else
echo -e "\033[34m- cmd does not exist, automatically install \"${cmd}\"\033[0m"
pip install -r ${TORCH_MUSA_HOME}/requirements.txt # extra requirements
fi
}
precommit_install(){
cmd_check "pre-commit"
root_dir="$(dirname "$(realpath "${BASH_SOURCE:-$0}" )")"
if [ ! -f ${root_dir}/.git/hooks/pre-commit ]; then
pushd $root_dir
pre-commit install
popd
fi
}
precommit_install
clone_pytorch() {
# if PyTorch repo exists already, we skip gitting clone PyTorch
if [ -d ${PYTORCH_PATH} ]; then
echo -e "\033[34mPyTorch repo path is ${PYTORCH_PATH} ...\033[0m"
pushd ${PYTORCH_PATH}
git checkout ${PYTORCH_TAG}
echo -e "\033[34m Switch the Pytorch repo to tag ${PYTORCH_TAG} \033[0m"
popd
else
ABSOLUTE_PATH=$(cd $(dirname ${PYTORCH_PATH}) && pwd)"/pytorch"
echo -e "\033[34mUsing default pytorch repo path: ${ABSOLUTE_PATH}\033[0m"
if [ ! -d "${PYTORCH_PATH}" ]; then
pushd ${TORCH_MUSA_HOME}/..
echo -e "\033[34mPyTorch repo does not exist, now git clone PyTorch to ${ABSOLUTE_PATH} ...\033[0m"
git clone -b ${PYTORCH_TAG} https://github.com/pytorch/pytorch.git --depth=1
popd
fi
fi
# to make sure submodules are fetched
pushd ${PYTORCH_PATH}
update_submodule
}
apply_torch_patches() {
# apply patches into PyTorch
echo -e "\033[34mApplying patches to ${PYTORCH_PATH} ...\033[0m"
# clean PyTorch before patching
if [ -d "$PYTORCH_PATH/.git" ]; then
echo -e "\033[34mStash and checkout the PyTorch environment before patching. \033[0m"
pushd $PYTORCH_PATH
git stash -u
git checkout ${PYTORCH_TAG}
popd
fi
for file in $(find ${TORCH_PATCHES_DIR} -type f -not -path "*/kineto/*" -print); do
if [ "${file##*.}"x = "patch"x ]; then
echo -e "\033[34mapplying patch: $file \033[0m"
pushd $PYTORCH_PATH
git apply --check $file
git apply $file
popd
fi
done
if [ ${USE_KINETO} -eq 1 ]; then
for file in $(find ${TORCH_PATCHES_DIR}/kineto -type f -print); do
if [ "${file##*.}"x = "patch"x ]; then
echo -e "\033[34mapplying patch: $file \033[0m"
pushd $PYTORCH_PATH
git apply --check $file
git apply $file
popd
fi
done
fi
}
update_kineto_source() {
echo -e "\033[34mRemove Kineto in pytorch...\033[0m"
pushd ${PYTORCH_PATH}
# remove the current kineto
rm -rf ${PYTORCH_PATH}/third_party/kineto
git submodule update --init --recursive --depth 1
# remove the official kineto
rm -rf ${PYTORCH_PATH}/third_party/kineto
popd
echo -e "\033[34mMoving Updating kineto and moving to pytorch...\033[0m"
pushd ${TORCH_MUSA_HOME}
git submodule update --init --depth 1
if [ -d /home/kineto ]; then
cp -r /home/kineto/libkineto/third_party ./third_party/kineto/libkineto/
else
git submodule update --init --recursive --depth 1
fi
echo "gitdir: $TORCH_MUSA_HOME/.git/modules/third_party/kineto" > ./third_party/kineto/.git
cp -r third_party/kineto ${PYTORCH_PATH}/third_party/
}
update_submodule() {
if [ ${USE_KINETO} -eq 1 ]; then
update_kineto_source
else
pushd ${PYTORCH_PATH}
git submodule update --init --recursive --depth 1
popd
fi
}
build_pytorch() {
echo -e "\033[34mBuilding PyTorch...\033[0m"
status=0
if [ ! -d ${PYTORCH_PATH} ]; then
echo -e "\033[34mAn error occurred while building PyTorch, the specified PyTorch repo [${PYTORCH_PATH}] does not exist \033[0m"
exit 1
fi
pushd ${PYTORCH_PATH}
pip install -r requirements.txt
pip install -r ${TORCH_MUSA_HOME}/requirements.txt # extra requirements
if [ $BUILD_WHEEL -eq 1 ]; then
rm -rf dist
pip uninstall torch -y
PYTORCH_BUILD_NUMBER=${PYTORCH_BUILD_NUMBER} \
PYTORCH_BUILD_VERSION=${PYTORCH_BUILD_VERSION} \
DEBUG=${DEBUG_MODE} \
USE_ASAN=${ASAN_MODE} \
USE_KINETO=${USE_KINETO} \
USE_NCCL=0 \
BUILD_TEST=0 python -m pip wheel . --wheel-dir ./dist --no-build-isolation
status=$?
rm -rf torch.egg-info
pip install dist/*.whl
else
PYTORCH_BUILD_NUMBER=${PYTORCH_BUILD_NUMBER} \
PYTORCH_BUILD_VERSION=${PYTORCH_BUILD_VERSION} \
DEBUG=${DEBUG_MODE} \
USE_ASAN=${ASAN_MODE} \
USE_KINETO=${USE_KINETO} \
USE_NCCL=0 \
BUILD_TEST=0 python -m pip install --no-build-isolation -v -e .
status=$?
fi
popd
return $status
}
clean_pytorch() {
echo -e "\033[34mCleaning PyTorch...\033[0m"
pushd ${PYTORCH_PATH}
python setup.py clean
popd
}
clean_torch_musa() {
echo -e "\033[34mCleaning torch_musa...\033[0m"
pushd ${TORCH_MUSA_HOME}
TORCH_DEVICE_BACKEND_AUTOLOAD=0 python setup.py clean
rm -rf $CUR_DIR/build
popd
}
build_torch_musa() {
echo -e "\033[34mBuilding torch_musa...\033[0m"
status=0
pushd ${TORCH_MUSA_HOME}
pip install -r requirements.txt
if [ $BUILD_WHEEL -eq 1 ]; then
rm -rf dist
TORCH_DEVICE_BACKEND_AUTOLOAD=0 \
PYTORCH_REPO_PATH=${PYTORCH_PATH} \
DEBUG=${DEBUG_MODE} \
USE_ASAN=${ASAN_MODE} \
ENABLE_COMPILE_FP64=${COMPILE_FP64} \
USE_MCCL=${USE_MCCL} \
USE_KINETO=${USE_KINETO} python -m pip wheel . --wheel-dir ./dist --no-build-isolation
status=$?
rm -rf torch_musa.egg-info
pip install dist/*.whl
else
TORCH_DEVICE_BACKEND_AUTOLOAD=0 \
PYTORCH_REPO_PATH=${PYTORCH_PATH} \
DEBUG=${DEBUG_MODE} \
USE_ASAN=${ASAN_MODE} \
ENABLE_COMPILE_FP64=${COMPILE_FP64} \
USE_MCCL=${USE_MCCL} \
USE_KINETO=${USE_KINETO} python -m pip install --no-build-isolation -v -e .
status=$?
fi
if [ $status -ne 0 ]; then
exit $status
fi
# scan and output ops list for each building
bash ${CUR_DIR}/scripts/scan_ops.sh
popd
return $status
}
main() {
if [[ ${CLEAN} -eq 1 ]] && [[ ${BUILD_TORCH} -ne 1 ]] && [[ ${BUILD_TORCH_MUSA} -ne 1 ]]; then
clean_pytorch
clean_torch_musa
exit 0
fi
if [ ${ONLY_PATCH} -eq 1 ]; then
apply_torch_patches
exit 0
fi
if [ ${BUILD_TORCH} -eq 1 ]; then
clone_pytorch
if [ ${CLEAN} -eq 1 ]; then
clean_pytorch
fi
apply_torch_patches
build_pytorch
build_pytorch_status=$?
if [ $build_pytorch_status -ne 0 ]; then
clean_and_build="bash build.sh -c # Clean PyTorch/torch_musa and build"
echo -e "\033[31mBuilding PyTorch failed, please try cleaning first before building: \033[0m"
echo -e "\033[32m$clean_and_build \033[0m"
exit 1
fi
fi
if [ ${BUILD_TORCH_MUSA} -eq 1 ]; then
if [ ${CLEAN} -eq 1 ]; then
clean_torch_musa
fi
build_torch_musa
build_torch_musa_status=$?
if [ $build_torch_musa_status -ne 0 ]; then
echo -e "\033[31mPlease try the following commands once building torch_musa is failed: \033[0m"
echo -e "\033[32mClean PyTorch/torch_musa and build: \033[0m"
echo "cmd1: bash build.sh -c"
echo -e "\033[32mIf cmd1 still failed, update torch_musa to newest and build: \033[0m"
echo "cmd2: git fetch && git rebase origin/main && bash build.sh -c"
echo -e "\033[32mIf cmd2 still failed, update libraries and build: \033[0m"
echo "cmd3: bash docker/common/daily/update_daily_musart.sh && bash docker/common/daily/update_daily_mudnn.sh && bash build.sh -c"
echo -e "\033[32mIf cmd3 still failed, please check driver version on your host machine. \033[0m"
exit 1
fi
fi
}
main