Skip to content

[VLM] How to reproduce the results in examples/geo3k_vlm/README.md? #1160

@lostkevin

Description

@lostkevin

Hi! We tried to reproduce eval score ~0.6 at around step 400, but only get ~0.55 on Qwen3-VL-2B-Instruct. Could you please give the detailed configuration to reproduce the megatron curve shown in README.md?

Here is our config:

  • GPU: 8xH20
  • Image: slimerl/slime:nightly-dev-20251216c
  • commit: 34bb0cd

We modify the script to load the local checkpoint:

#!/bin/bash

# Qwen3 VL RL training on geo3k dataset

# Supports both megatron and fsdp training backends

# Usage: 

#   SLIME_SCRIPT_TRAIN_BACKEND=fsdp ./run_geo3k_vlm.sh

#   SLIME_SCRIPT_MODEL_NAME=Qwen3-VL-2B-Instruct ./run_geo3k_vlm.sh

unset NCCL_DEBUG

export RAY_DEDUP_LOGS=0

# Configuration

TRAIN_BACKEND=${SLIME_SCRIPT_TRAIN_BACKEND:-"megatron"}

MODEL_NAME=${SLIME_SCRIPT_MODEL_NAME:-"Qwen3-VL-2B-Instruct"}

DATASET_NAME=${SLIME_SCRIPT_DATASET_NAME:-"chenhegu/geo3k_imgurl"}

NUM_GPUS=${SLIME_SCRIPT_NUM_GPUS:-8}

DATASET_LOCAL_NAME=$(basename "$DATASET_NAME")

# Validate MODEL_NAME

SLIME_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." &>/dev/null && pwd)"

source "${SLIME_DIR}/scripts/models/qwen3-vl-2b.sh"

# External Ray flag

if [ -z "$SLIME_SCRIPT_EXTERNAL_RAY" ] || [ "$SLIME_SCRIPT_EXTERNAL_RAY" = "0" ]; then

   USE_EXTERNAL_RAY=0

else

   USE_EXTERNAL_RAY=1

fi

# Cleanup

pkill -9 sglang

sleep 3

if [ "$USE_EXTERNAL_RAY" = "0" ]; then

   ray stop --force

   pkill -9 ray

fi

pkill -9 slime

sleep 3

if [ "$USE_EXTERNAL_RAY" = "0" ]; then

   pkill -9 ray

fi

pkill -9 slime

pkill -9 redis

set -ex

export PYTHONBUFFERED=16

# Detect NVLink

NVLINK_COUNT=$(nvidia-smi topo -m 2>/dev/null | grep -o 'NV[0-9][0-9]*' | wc -l)

if [ "$NVLINK_COUNT" -gt 0 ]; then

   HAS_NVLINK=1

else

   HAS_NVLINK=0

fi

echo "HAS_NVLINK: $HAS_NVLINK (detected $NVLINK_COUNT NVLink references)"

# Download model and dataset

# Common args

CKPT_ARGS=(

   --hf-checkpoint /path/to/${MODEL_NAME}

   --ref-load /path/to/${MODEL_NAME}

)

ROLLOUT_ARGS=(

   --prompt-data /path/to/datasets/${DATASET_LOCAL_NAME}/train.parquet

   --input-key problem

   --label-key answer

   --apply-chat-template

   --rollout-shuffle

   --rm-type math

   --num-rollout 3000

   --rollout-batch-size 64

   --n-samples-per-prompt 8

   --rollout-max-response-len 4096

   --rollout-temperature 0.8

   --global-batch-size 512

)

# required for vlm datasets

MULTIMODAL_KEYS='{"image": "images"}'

EVAL_ARGS=(

   --eval-interval 5

   --eval-prompt-data geo3k /path/to/${DATASET_LOCAL_NAME}/test.parquet

   --n-samples-per-eval-prompt 1

   --eval-max-response-len 4096

)

GRPO_ARGS=(

   --advantage-estimator grpo

   --use-kl-loss

   --kl-loss-coef 0.00

   --kl-loss-type low_var_kl

   --kl-coef 0.00

   --entropy-coef 0.00

   --eps-clip 0.2

   --eps-clip-high 0.28

)

OPTIMIZER_ARGS=(

   --optimizer adam

   --lr 1e-6

   --lr-decay-style constant

   --weight-decay 0.1

   --adam-beta1 0.9

   --adam-beta2 0.98

)

SGLANG_ARGS=(

   --rollout-num-gpus-per-engine 1

   --sglang-mem-fraction-static 0.6

   --sglang-cuda-graph-bs 1 2 4 8 16 24 32 40 48 56 64 72 80 88 96 104 112 120 128 136 144 152 160 168 176 184 192 200 208 216 224 232 240 248 256

)

# Wandb args (only if WANDB_API_KEY is set)

WANDB_ARGS=(

--use-wandb

--wandb-project slime_qwen3_vl

--wandb-group qwen3_vl_2b_slime_ref_3kiter

--wandb-host https://api.bandw.top

--wandb-key apikey
)

MISC_ARGS=(

   --colocate

)

# Backend-specific args

if [ "$TRAIN_BACKEND" = "fsdp" ]; then

    BACKEND_ARGS=(

      --train-backend fsdp

      --gradient-checkpointing

      --sglang-attention-backend fa3

      --attn-implementation flash_attention_3

      --update-weight-buffer-size 536870912

    )

else

    # megatron backend (default)

    BACKEND_ARGS=(

      --train-backend megatron

      --load /path/to/${MODEL_NAME}

      --tensor-model-parallel-size 1

      --sequence-parallel

      --pipeline-model-parallel-size 1

      --context-parallel-size 1

      --expert-model-parallel-size 1

      --expert-tensor-parallel-size 1

      --recompute-granularity full

      --recompute-method uniform

      --recompute-num-layers 1

      --use-dynamic-batch-size

      --max-tokens-per-gpu 4096

      --attention-dropout 0.0

      --hidden-dropout 0.0

      --accumulate-allreduce-grads-in-fp32

      --attention-softmax-in-fp32

      --attention-backend flash

      --megatron-to-hf-mode bridge

    )

fi

# Start Ray if not using external Ray

if [ "$USE_EXTERNAL_RAY" = "0" ]; then

   export MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}

   export no_proxy="127.0.0.1,${MASTER_ADDR}"

   ray start --head --node-ip-address ${MASTER_ADDR} --num-gpus ${NUM_GPUS} --disable-usage-stats --dashboard-host=0.0.0.0 --dashboard-port=8265

fi

# Build runtime env

RUNTIME_ENV_JSON="{

  \"env_vars\": {

    \"PYTHONPATH\": \"/root/Megatron-LM/\",

    \"CUDA_DEVICE_MAX_CONNECTIONS\": \"1\",

    \"NCCL_NVLS_ENABLE\": \"${HAS_NVLINK}\"

  }

}"

ray job submit --address="http://127.0.0.1:8265" \

   --runtime-env-json="${RUNTIME_ENV_JSON}" \

   -- python3 train.py \

   --actor-num-nodes 1 \

   --actor-num-gpus-per-node ${NUM_GPUS} \

   --multimodal-keys "${MULTIMODAL_KEYS}" \

   ${MODEL_ARGS[@]} \

   ${CKPT_ARGS[@]} \

   ${ROLLOUT_ARGS[@]} \

   ${EVAL_ARGS[@]} \

   ${GRPO_ARGS[@]} \

   ${OPTIMIZER_ARGS[@]} \

   ${SGLANG_ARGS[@]} \

   ${WANDB_ARGS[@]} \

   ${BACKEND_ARGS[@]} \

   ${MISC_ARGS[@]}

The eval curve:
Image

And the loss curve:
Image

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions