[VLM] How to reproduce the results in examples/geo3k_vlm/README.md?

Hi! We tried to reproduce eval score `~0.6` at around step 400, but only get `~0.55` on `Qwen3-VL-2B-Instruct`. Could you please give the detailed configuration to reproduce the megatron curve shown in README.md?

Here is our config:
+ GPU: 8xH20
+ Image: slimerl/slime:nightly-dev-20251216c
+ commit:  34bb0cdb3d653bee14ed5f657b5cfe5cd533b27e

We modify the script to load the local checkpoint:

```bash

#!/bin/bash

# Qwen3 VL RL training on geo3k dataset

# Supports both megatron and fsdp training backends

# Usage: 

#   SLIME_SCRIPT_TRAIN_BACKEND=fsdp ./run_geo3k_vlm.sh

#   SLIME_SCRIPT_MODEL_NAME=Qwen3-VL-2B-Instruct ./run_geo3k_vlm.sh

unset NCCL_DEBUG

export RAY_DEDUP_LOGS=0

# Configuration

TRAIN_BACKEND=${SLIME_SCRIPT_TRAIN_BACKEND:-"megatron"}

MODEL_NAME=${SLIME_SCRIPT_MODEL_NAME:-"Qwen3-VL-2B-Instruct"}

DATASET_NAME=${SLIME_SCRIPT_DATASET_NAME:-"chenhegu/geo3k_imgurl"}

NUM_GPUS=${SLIME_SCRIPT_NUM_GPUS:-8}

DATASET_LOCAL_NAME=$(basename "$DATASET_NAME")

# Validate MODEL_NAME

SLIME_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." &>/dev/null && pwd)"

source "${SLIME_DIR}/scripts/models/qwen3-vl-2b.sh"

# External Ray flag

if [ -z "$SLIME_SCRIPT_EXTERNAL_RAY" ] || [ "$SLIME_SCRIPT_EXTERNAL_RAY" = "0" ]; then

   USE_EXTERNAL_RAY=0

else

   USE_EXTERNAL_RAY=1

fi

# Cleanup

pkill -9 sglang

sleep 3

if [ "$USE_EXTERNAL_RAY" = "0" ]; then

   ray stop --force

   pkill -9 ray

fi

pkill -9 slime

sleep 3

if [ "$USE_EXTERNAL_RAY" = "0" ]; then

   pkill -9 ray

fi

pkill -9 slime

pkill -9 redis

set -ex

export PYTHONBUFFERED=16

# Detect NVLink

NVLINK_COUNT=$(nvidia-smi topo -m 2>/dev/null | grep -o 'NV[0-9][0-9]*' | wc -l)

if [ "$NVLINK_COUNT" -gt 0 ]; then

   HAS_NVLINK=1

else

   HAS_NVLINK=0

fi

echo "HAS_NVLINK: $HAS_NVLINK (detected $NVLINK_COUNT NVLink references)"

# Download model and dataset

# Common args

CKPT_ARGS=(

   --hf-checkpoint /path/to/${MODEL_NAME}

   --ref-load /path/to/${MODEL_NAME}

)

ROLLOUT_ARGS=(

   --prompt-data /path/to/datasets/${DATASET_LOCAL_NAME}/train.parquet

   --input-key problem

   --label-key answer

   --apply-chat-template

   --rollout-shuffle

   --rm-type math

   --num-rollout 3000

   --rollout-batch-size 64

   --n-samples-per-prompt 8

   --rollout-max-response-len 4096

   --rollout-temperature 0.8

   --global-batch-size 512

)

# required for vlm datasets

MULTIMODAL_KEYS='{"image": "images"}'

EVAL_ARGS=(

   --eval-interval 5

   --eval-prompt-data geo3k /path/to/${DATASET_LOCAL_NAME}/test.parquet

   --n-samples-per-eval-prompt 1

   --eval-max-response-len 4096

)

GRPO_ARGS=(

   --advantage-estimator grpo

   --use-kl-loss

   --kl-loss-coef 0.00

   --kl-loss-type low_var_kl

   --kl-coef 0.00

   --entropy-coef 0.00

   --eps-clip 0.2

   --eps-clip-high 0.28

)

OPTIMIZER_ARGS=(

   --optimizer adam

   --lr 1e-6

   --lr-decay-style constant

   --weight-decay 0.1

   --adam-beta1 0.9

   --adam-beta2 0.98

)

SGLANG_ARGS=(

   --rollout-num-gpus-per-engine 1

   --sglang-mem-fraction-static 0.6

   --sglang-cuda-graph-bs 1 2 4 8 16 24 32 40 48 56 64 72 80 88 96 104 112 120 128 136 144 152 160 168 176 184 192 200 208 216 224 232 240 248 256

)

# Wandb args (only if WANDB_API_KEY is set)

WANDB_ARGS=(

--use-wandb

--wandb-project slime_qwen3_vl

--wandb-group qwen3_vl_2b_slime_ref_3kiter

--wandb-host https://api.bandw.top

--wandb-key apikey
)

MISC_ARGS=(

   --colocate

)

# Backend-specific args

if [ "$TRAIN_BACKEND" = "fsdp" ]; then

    BACKEND_ARGS=(

      --train-backend fsdp

      --gradient-checkpointing

      --sglang-attention-backend fa3

      --attn-implementation flash_attention_3

      --update-weight-buffer-size 536870912

    )

else

    # megatron backend (default)

    BACKEND_ARGS=(

      --train-backend megatron

      --load /path/to/${MODEL_NAME}

      --tensor-model-parallel-size 1

      --sequence-parallel

      --pipeline-model-parallel-size 1

      --context-parallel-size 1

      --expert-model-parallel-size 1

      --expert-tensor-parallel-size 1

      --recompute-granularity full

      --recompute-method uniform

      --recompute-num-layers 1

      --use-dynamic-batch-size

      --max-tokens-per-gpu 4096

      --attention-dropout 0.0

      --hidden-dropout 0.0

      --accumulate-allreduce-grads-in-fp32

      --attention-softmax-in-fp32

      --attention-backend flash

      --megatron-to-hf-mode bridge

    )

fi

# Start Ray if not using external Ray

if [ "$USE_EXTERNAL_RAY" = "0" ]; then

   export MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}

   export no_proxy="127.0.0.1,${MASTER_ADDR}"

   ray start --head --node-ip-address ${MASTER_ADDR} --num-gpus ${NUM_GPUS} --disable-usage-stats --dashboard-host=0.0.0.0 --dashboard-port=8265

fi

# Build runtime env

RUNTIME_ENV_JSON="{

  \"env_vars\": {

    \"PYTHONPATH\": \"/root/Megatron-LM/\",

    \"CUDA_DEVICE_MAX_CONNECTIONS\": \"1\",

    \"NCCL_NVLS_ENABLE\": \"${HAS_NVLINK}\"

  }

}"

ray job submit --address="http://127.0.0.1:8265" \

   --runtime-env-json="${RUNTIME_ENV_JSON}" \

   -- python3 train.py \

   --actor-num-nodes 1 \

   --actor-num-gpus-per-node ${NUM_GPUS} \

   --multimodal-keys "${MULTIMODAL_KEYS}" \

   ${MODEL_ARGS[@]} \

   ${CKPT_ARGS[@]} \

   ${ROLLOUT_ARGS[@]} \

   ${EVAL_ARGS[@]} \

   ${GRPO_ARGS[@]} \

   ${OPTIMIZER_ARGS[@]} \

   ${SGLANG_ARGS[@]} \

   ${WANDB_ARGS[@]} \

   ${BACKEND_ARGS[@]} \

   ${MISC_ARGS[@]}

```

The eval curve:
<img width="491" height="447" alt="Image" src="https://github.com/user-attachments/assets/7d45015f-6329-416d-b7b8-b541e1c1c597" />

And the loss curve:
<img width="1475" height="855" alt="Image" src="https://github.com/user-attachments/assets/1e520eee-ef0c-43b8-a024-2745650706f6" />


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[VLM] How to reproduce the results in examples/geo3k_vlm/README.md? #1160

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

[VLM] How to reproduce the results in examples/geo3k_vlm/README.md? #1160

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions