This is the official repository for our paper: Enhancing GUI Agent with Uncertainty-Aware Self-Trained Evaluator [NeurIPS 2025].
This project is based on EasyR1 and LLaMA-Factory.
| Method | Bits | 1.5B | 3B | 7B | 32B | 72B |
|---|---|---|---|---|---|---|
| GRPO Full Fine-Tuning | AMP | 2*24GB | 4*40GB | 8*40GB | 16*80GB | 32*80GB |
| GRPO Full Fine-Tuning | BF16 | 1*24GB | 1*40GB | 4*40GB | 8*80GB | 16*80GB |
Note
Use worker.actor.fsdp.torch_dtype=bf16 and worker.actor.optim.strategy=adamw_bf16 to enable bf16 training.
This project currently requires two separate environments for SFT and RL training stages.
# Create environment for SFT
conda create --name urst_sft python=3.10
conda activate urst_sft
# Install LLaMA-Factory dependencies
cd LLaMA-Factory
pip install -e .
cd ..Modify /EasyR1/LLaMA-Factory/examples/train_full/qwen2_5_vl_evaluator_full_sft.yaml
model_name_or_path: /path/to/your/base/model # e.g., Qwen/Qwen2.5-VL-3B-Instruct
output_dir: /path/to/save/sft/model # e.g., ./checkpoints/sft_round_init
train_img_dir: /path/to/your/train_images
# Ensure you are in the project root (URST/EasyR1)
python ./verl/trainer/sft.py \
--train_files /path/to/your/train_data.json \
--project_name easyr1 \
--experiment_name my_experimentpython verl/trainer/eval_sft.py --model_path /path/to/save/sft/model
Note: This stage requires a different environment based on EasyR1/veRL.
conda deactivate
conda create --name urst_rl python=3.9
conda activate urst_rl
# Install EasyR1 dependencies
pip install -e .Modify /URST/EasyR1/examples/config.yaml
data:
val_files: /path/to/your/test_data.json
image_dir: /path/to/your/test_images
image_train_dir: /path/to/your/train_images
trainer:
save_checkpoint_path: /path/to/save/rl/checkpoints
load_checkpoint_path: /path/to/save/sft/model # Path to the SFT model from Stage 1
export CUDA_VISIBLE_DEVICES=4,5,6,7
MODEL_PATH=/path/to/your/model #SFT保存的checkpoint
CHECKPOINT_PATH=/path/to/checkpoint/saves/easy_r1/experiment_name/SGPO_models_v1
data.train_files=/path/to/data/train_data/processed_candidate_data_labelled_list.json \
trainer.n_gpus_per_node=4 \ #the number of CUDA_VISIBLE_DEVICES
bash ./examples/qwen2_5_vl_3b_urst_sgpo.sh
# Define paths (Please modify these paths to match your environment)
CONFIG_PATH="examples/config.yaml"
# Path to the tokenizer/processor (usually the SFT model path)
TOKENIZER_PATH="/share/urst_data/saves/easy_r1/qwen2_5_vl_3b_urst_sgpo_para_from_code/SFT_models/round_init"
# Base directory for test data and images
TEST_DATA_DIR="/share/urst_data/data/test_data"
# Path to the format prompt jinja file
FORMAT_PROMPT_PATH="examples/format_prompt/urst.jinja"
# Directory to save evaluation results
OUTPUT_DIR="output/eval_results"bash ./examples/test.sh