1+ weight_path=/host/ssd/hf_models/llama2-7b-hf
2+ # weight_path=/host/ssd/hf_models/Meta-Llama-3.1-8B
3+ export WANDB_MODE=disabled
4+ num_gpus=8
5+ epoch=3
6+ mbs=2
7+ MODE=${1:- zero1tp}
8+ if [ " $MODE " == " zero1tp" ]; then
9+ ZERO_STAGE=1
10+ AUTOTP_SIZE=4
11+ per_device_train_batch_size=$(( mbs * AUTOTP_SIZE))
12+ elif [ " $MODE " == " zero2tp" ]; then
13+ ZERO_STAGE=2
14+ AUTOTP_SIZE=4
15+ per_device_train_batch_size=$(( mbs * AUTOTP_SIZE))
16+ elif [ " $MODE " == " zero1" ]; then
17+ ZERO_STAGE=1
18+ AUTOTP_SIZE=0
19+ per_device_train_batch_size=$mbs
20+ elif [ " $MODE " == " zero2" ]; then
21+ ZERO_STAGE=2
22+ AUTOTP_SIZE=0
23+ per_device_train_batch_size=$mbs
24+ elif [ " $MODE " == " zero3" ]; then
25+ ZERO_STAGE=3
26+ AUTOTP_SIZE=0
27+ per_device_train_batch_size=$mbs
28+ elif [ " $MODE " == " tp" ]; then
29+ ZERO_STAGE=0
30+ AUTOTP_SIZE=8
31+ per_device_train_batch_size=$(( mbs * AUTOTP_SIZE))
32+ else
33+ echo " error '$MODE ',please use 'zero' or 'tp'。"
34+ exit 1
35+ fi
36+ TEMPLATE_FILE=" configs/ds_config_temp.json"
37+ OUTPUT_FILE=" configs/ds_config.json"
38+ sed -e " s/\$ {zero_stage}/${ZERO_STAGE} /g" \
39+ -e " s/\$ {autotp_size}/${AUTOTP_SIZE} /g" \
40+ $TEMPLATE_FILE > $OUTPUT_FILE
41+
42+
43+ deepspeed --num_gpus $num_gpus \
44+ --master_port 51336 train.py \
45+ --model_name_or_path $weight_path \
46+ --data_path ./alpaca_data.json \
47+ --bf16 True \
48+ --output_dir out_load_test/$MODE \
49+ --num_train_epochs $epoch \
50+ --gradient_checkpointing false \
51+ --per_device_train_batch_size $per_device_train_batch_size \
52+ --per_device_eval_batch_size 1 \
53+ --evaluation_strategy no \
54+ --save_strategy steps \
55+ --save_steps 10000 \
56+ --gradient_accumulation_steps 4 \
57+ --learning_rate 0 \
58+ --learning_rate 2e-5 \
59+ --weight_decay 0. \
60+ --warmup_ratio 0.03 \
61+ --lr_scheduler_type cosine \
62+ --logging_steps 1 \
63+ --tf32 True \
64+ --deepspeed " ./configs/ds_config.json"
0 commit comments