lmms-engine/examples/bagel/run.sh at main · EvolvingLMMs-Lab/lmms-engine · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/bin/bash

################################################################################
# BAGEL Model Training with FSDP2
################################################################################
#
# DESCRIPTION:
#   Train BAGEL multimodal model with unified vision understanding and
#   generation capabilities using FSDP2 distributed training.
#
# KEY FEATURES:
#   - Vision understanding and generation
#   - Qwen2-based LLM with MoT (Mixture of Tokens)
#   - SigLIP vision encoder
#   - VAE for image generation
#   - Sequence packing support
#   - FSDP2 distributed training
#   - Optional Native Sparse Attention (NSA)
#
# REQUIREMENTS:
#   - 8x GPUs (A100/H100 recommended, 80GB VRAM)
#   - flash-attn: pip install flash-attn --no-build-isolation
#   - Optional NSA: pip install git+https://github.com/XunhaoLai/native-sparse-attention-triton.git
#
# DATASET:
#   Prepare your dataset in BAGEL format (Parquet/Arrow/JSON):
#   Example dataset: https://huggingface.co/datasets/kcz358/bagel-example
#
#   Example dataset entry:
#   ```json
#   {
#     "messages": [
#       {
#         "role": "user",
#         "content": [
#           {"type": "image_url", "image_url": {"url": "path/to/image.jpg"}},
#           {"type": "text", "text": "Describe this image"}
#         ]
#       },
#       {
#         "role": "assistant",
#         "content": [{"type": "text", "text": "This image shows..."}]
#       }
#     ]
#   }
#   ```
#
# MODEL CHECKPOINT:
#   You can use either:
#   1. Original BAGEL weights (requires config.json conversion)
#      See: https://huggingface.co/kcz358/bagel_hf/blob/main/config.json
#   2. Converted HF weights: lmms-lab/BAGEL-7B-MoT-ver.LE (recommended)
#
# CONFIGURATION:
#   Edit example_config.yaml to customize:
#   - Model checkpoint: load_from_pretrained_path
#   - Dataset path: datasets[0].path
#   - Batch size: per_device_train_batch_size
#   - Packing: packing (true/false)
#   - Visual understanding: extra_kwargs.visual_und
#
# PERFORMANCE TIPS:
#   - Enable packing for better GPU utilization (packing: true)
#   - Use NSA for long sequences (enable monkey_patch_kwargs)
#   - Adjust packing_length based on GPU memory (default: 4096)
#   - Monitor memory with: watch -n 1 nvidia-smi
#
# ADVANCED FEATURES:
#   - Native Sparse Attention: Uncomment monkey_patch_kwargs in config
#   - Mixed Understanding/Generation: Set visual_und: true/false
#
################################################################################

# Number of GPUs
NGPUS=8

# Training command
torchrun --nproc_per_node=${NGPUS} \
  --nnodes=1 \
  --node_rank=0 \
  --master_addr=127.0.0.1 \
  --master_port=12357 \
  -m lmms_engine.launch.cli \
  config_yaml=examples/bagel/example_config.yaml

################################################################################
# MULTI-NODE TRAINING:
#
# On rank 0 node:
# torchrun --nproc_per_node=8 \
#   --nnodes=2 \
#   --node_rank=0 \
#   --master_addr=<RANK_0_IP> \
#   --master_port=12357 \
#   -m lmms_engine.launch.cli \
#   config_yaml=examples/bagel/example_config.yaml
#
# On rank 1 node:
# torchrun --nproc_per_node=8 \
#   --nnodes=2 \
#   --node_rank=1 \
#   --master_addr=<RANK_0_IP> \
#   --master_port=12357 \
#   -m lmms_engine.launch.cli \
#   config_yaml=examples/bagel/example_config.yaml
#
################################################################################
#
# TROUBLESHOOTING:
#   - If config.json is not HF compatible, use the converted weights
#   - For memory issues, reduce batch size or packing_length
#   - Check docs/models/bagel.md for detailed configuration options
#
################################################################################