Skip to content

docker container error #40

@runzeer

Description

@runzeer

set -x

MODEL_PATH=Qwen/Qwen2.5-VL-3B-Instruct  # replace it with your local file path

SYSTEM_PROMPT=""""""

python3 -m verl.trainer.main
    config=examples/config.yaml
    data.train_files=datasets/GUI-R1/train.parquet
    data.val_files=datasets/GUI-R1/test.parquet
    data.system_prompt="${SYSTEM_PROMPT}"
    worker.actor.model.model_path=${MODEL_PATH}
    worker.rollout.tensor_parallel_size=1
    worker.rollout.enable_chunked_prefill=false
    worker.reward.compute_score=r1gui
    trainer.experiment_name=qwen2_5_vl_3b_guir1_grpo
    trainer.n_gpus_per_node=8
    data.max_pixels=1258291
    data.max_prompt_length=2048
    data.max_response_length=1024
    data.val_batch_size=256

Traceback (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/workspace/GUI-R1/verl/trainer/main.py", line 109, in
    main()
  File "/workspace/GUI-R1/verl/trainer/main.py", line 105, in main
    ray.get(runner.run.remote(ppo_config))
  File "/usr/local/lib/python3.10/dist-packages/ray/_private/auto_init_hook.py", line 21, in auto_init_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/ray/_private/worker.py", line 2755, in get
    values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
  File "/usr/local/lib/python3.10/dist-packages/ray/_private/worker.py", line 906, in get_objects
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(NotImplementedError): ray::Runner.run() (pid=8822, ip=172.17.0.2, actor_id=26a989c7b7e863bf82d8bd9701000000, repr=<main.Runner object at 0x7fba5b1021a0>)
  File "/workspace/GUI-R1/verl/trainer/main.py", line 84, in run
    trainer.init_workers()
  File "/workspace/GUI-R1/verl/trainer/ray_trainer.py", line 459, in init_workers
    self.ref_policy_wg.init_model()
  File "/workspace/GUI-R1/verl/single_controller/ray/base.py", line 47, in func
    output = ray.get(output)
ray.exceptions.RayTaskError(NotImplementedError): ray::WorkerDict.ref_init_model() (pid=9477, ip=172.17.0.2, actor_id=862252fcfa0a702017bb208d01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7fc6b4d26b60>)
  File "/workspace/GUI-R1/verl/single_controller/ray/base.py", line 432, in func
    return getattr(self.worker_dict[key], name)(*args, **kwargs)
  File "/workspace/GUI-R1/verl/single_controller/base/decorator.py", line 207, in inner
    return func(*args, **kwargs)
  File "/workspace/GUI-R1/verl/workers/fsdp_workers.py", line 349, in init_model
    self._build_model_optimizer(
  File "/workspace/GUI-R1/verl/workers/fsdp_workers.py", line 195, in _build_model_optimizer
    model = auto_class.from_pretrained(
  File "/usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py", line 564, in from_pretrained
    return model_class.from_pretrained(
  File "/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py", line 262, in _wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py", line 4397, in from_pretrained
    dispatch_model(model, **device_map_kwargs)
  File "/usr/local/lib/python3.10/dist-packages/accelerate/big_modeling.py", line 496, in dispatch_model
    model.to(device)
  File "/usr/local/lib/python3.10/dist-packages/transformers/modeling_utils.py", line 3162, in to
    return super().to(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1340, in to
    return self._apply(convert)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 900, in _apply
    module._apply(fn)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 927, in _apply
    param_applied = fn(param)
 

File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1333, in convert
    raise NotImplementedError(
NotImplementedError: Cannot copy out of meta tensor; no data! Please use torch.nn.Module.to_empty() instead of torch.nn.Module.to() when moving module from meta to a different device.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions