-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvllm_1p7b_8002.log
More file actions
135 lines (135 loc) · 12.9 KB
/
vllm_1p7b_8002.log
File metadata and controls
135 lines (135 loc) · 12.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
INFO 08-26 11:32:46 [__init__.py:235] Automatically detected platform cpu.
WARNING 08-26 11:32:47 [_custom_ops.py:20] Failed to import from vllm._C with ImportError("dlopen(/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/_C.abi3.so, 0x0002): symbol not found in flat namespace '__Z14int8_scaled_mmRN2at6TensorERKS0_S3_S3_S3_RKNSt3__18optionalIS0_EE'")
INFO 08-26 11:32:49 [api_server.py:1755] vLLM API server version 0.10.0
INFO 08-26 11:32:49 [cli_args.py:261] non-default args: {'model_tag': 'Qwen/Qwen3-1.7B', 'port': 8002, 'model': 'Qwen/Qwen3-1.7B', 'dtype': 'float32', 'max_model_len': 2048, 'max_num_batched_tokens': 2048}
INFO 08-26 11:32:57 [config.py:3437] Upcasting torch.bfloat16 to torch.float32.
INFO 08-26 11:32:57 [config.py:1604] Using max model len 2048
INFO 08-26 11:32:57 [arg_utils.py:1030] Chunked prefill is not supported for ARM and POWER CPUs; disabling it for V1 backend.
INFO 08-26 11:33:05 [__init__.py:235] Automatically detected platform cpu.
WARNING 08-26 11:33:07 [_custom_ops.py:20] Failed to import from vllm._C with ImportError("dlopen(/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/_C.abi3.so, 0x0002): symbol not found in flat namespace '__Z14int8_scaled_mmRN2at6TensorERKS0_S3_S3_S3_RKNSt3__18optionalIS0_EE'")
INFO 08-26 11:33:08 [core.py:572] Waiting for init message from front-end.
INFO 08-26 11:33:08 [core.py:71] Initializing a V1 LLM engine (v0.10.0) with config: model='Qwen/Qwen3-1.7B', speculative_config=None, tokenizer='Qwen/Qwen3-1.7B', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config={}, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float32, max_seq_len=2048, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=True, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cpu, decoding_config=DecodingConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_backend=''), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None), seed=0, served_model_name=Qwen/Qwen3-1.7B, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=False, use_async_output_proc=False, pooler_config=None, compilation_config={"level":2,"debug_dump_path":"","cache_dir":"","backend":"inductor","custom_ops":["none"],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output","vllm.mamba_mixer2"],"use_inductor":true,"compile_sizes":[],"inductor_compile_config":{"enable_auto_functionalized_v2":false,"dce":true,"size_asserts":false,"nan_asserts":false,"epilogue_fusion":true},"inductor_passes":{},"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[],"cudagraph_copy_inputs":false,"full_cuda_graph":false,"max_capture_size":256,"local_cache_dir":null}
INFO 08-26 11:33:08 [importing.py:63] Triton not installed or not compatible; certain GPU-related functions will not be available.
ERROR 08-26 11:33:09 [core.py:632] EngineCore failed to start.
ERROR 08-26 11:33:09 [core.py:632] Traceback (most recent call last):
ERROR 08-26 11:33:09 [core.py:632] File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 623, in run_engine_core
ERROR 08-26 11:33:09 [core.py:632] engine_core = EngineCoreProc(*args, **kwargs)
ERROR 08-26 11:33:09 [core.py:632] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 08-26 11:33:09 [core.py:632] File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 441, in __init__
ERROR 08-26 11:33:09 [core.py:632] super().__init__(vllm_config, executor_class, log_stats,
ERROR 08-26 11:33:09 [core.py:632] File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 77, in __init__
ERROR 08-26 11:33:09 [core.py:632] self.model_executor = executor_class(vllm_config)
ERROR 08-26 11:33:09 [core.py:632] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 08-26 11:33:09 [core.py:632] File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/executor/executor_base.py", line 53, in __init__
ERROR 08-26 11:33:09 [core.py:632] self._init_executor()
ERROR 08-26 11:33:09 [core.py:632] File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/executor/uniproc_executor.py", line 48, in _init_executor
ERROR 08-26 11:33:09 [core.py:632] self.collective_rpc("init_device")
ERROR 08-26 11:33:09 [core.py:632] File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/executor/uniproc_executor.py", line 58, in collective_rpc
ERROR 08-26 11:33:09 [core.py:632] answer = run_method(self.driver_worker, method, args, kwargs)
ERROR 08-26 11:33:09 [core.py:632] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 08-26 11:33:09 [core.py:632] File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/utils/__init__.py", line 2985, in run_method
ERROR 08-26 11:33:09 [core.py:632] return func(*args, **kwargs)
ERROR 08-26 11:33:09 [core.py:632] ^^^^^^^^^^^^^^^^^^^^^
ERROR 08-26 11:33:09 [core.py:632] File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/worker/worker_base.py", line 603, in init_device
ERROR 08-26 11:33:09 [core.py:632] self.worker.init_device() # type: ignore
ERROR 08-26 11:33:09 [core.py:632] ^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 08-26 11:33:09 [core.py:632] File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/v1/worker/cpu_worker.py", line 60, in init_device
ERROR 08-26 11:33:09 [core.py:632] ret = torch.ops._C_utils.init_cpu_threads_env(self.local_omp_cpuid)
ERROR 08-26 11:33:09 [core.py:632] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ERROR 08-26 11:33:09 [core.py:632] File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/torch/_ops.py", line 1267, in __getattr__
ERROR 08-26 11:33:09 [core.py:632] raise AttributeError(
ERROR 08-26 11:33:09 [core.py:632] AttributeError: '_OpNamespace' '_C_utils' object has no attribute 'init_cpu_threads_env'
Process EngineCore_0:
Traceback (most recent call last):
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap
self.run()
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 636, in run_engine_core
raise e
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 623, in run_engine_core
engine_core = EngineCoreProc(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 441, in __init__
super().__init__(vllm_config, executor_class, log_stats,
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 77, in __init__
self.model_executor = executor_class(vllm_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/executor/executor_base.py", line 53, in __init__
self._init_executor()
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/executor/uniproc_executor.py", line 48, in _init_executor
self.collective_rpc("init_device")
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/executor/uniproc_executor.py", line 58, in collective_rpc
answer = run_method(self.driver_worker, method, args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/utils/__init__.py", line 2985, in run_method
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/worker/worker_base.py", line 603, in init_device
self.worker.init_device() # type: ignore
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/v1/worker/cpu_worker.py", line 60, in init_device
ret = torch.ops._C_utils.init_cpu_threads_env(self.local_omp_cpuid)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/torch/_ops.py", line 1267, in __getattr__
raise AttributeError(
AttributeError: '_OpNamespace' '_C_utils' object has no attribute 'init_cpu_threads_env'
Traceback (most recent call last):
File "/Users/ram/miniforge3/envs/forecaster/bin/vllm", line 8, in <module>
sys.exit(main())
^^^^^^
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/entrypoints/cli/main.py", line 54, in main
args.dispatch_function(args)
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/entrypoints/cli/serve.py", line 52, in cmd
uvloop.run(run_server(args))
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/uvloop/__init__.py", line 109, in run
return __asyncio.run(
^^^^^^^^^^^^^^
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/asyncio/runners.py", line 195, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/uvloop/__init__.py", line 61, in wrapper
return await main
^^^^^^^^^^
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 1791, in run_server
await run_server_worker(listen_address, sock, args, **uvicorn_kwargs)
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 1811, in run_server_worker
async with build_async_engine_client(args, client_config) as engine_client:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/contextlib.py", line 210, in __aenter__
return await anext(self.gen)
^^^^^^^^^^^^^^^^^^^^^
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 158, in build_async_engine_client
async with build_async_engine_client_from_engine_args(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/contextlib.py", line 210, in __aenter__
return await anext(self.gen)
^^^^^^^^^^^^^^^^^^^^^
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 194, in build_async_engine_client_from_engine_args
async_llm = AsyncLLM.from_vllm_config(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/v1/engine/async_llm.py", line 163, in from_vllm_config
return cls(
^^^^
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/v1/engine/async_llm.py", line 117, in __init__
self.engine_core = EngineCoreClient.make_async_mp_client(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/v1/engine/core_client.py", line 98, in make_async_mp_client
return AsyncMPClient(*client_args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/v1/engine/core_client.py", line 677, in __init__
super().__init__(
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/v1/engine/core_client.py", line 408, in __init__
with launch_core_engines(vllm_config, executor_class,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/contextlib.py", line 144, in __exit__
next(self.gen)
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/v1/engine/utils.py", line 697, in launch_core_engines
wait_for_engine_startup(
File "/Users/ram/miniforge3/envs/forecaster/lib/python3.12/site-packages/vllm/v1/engine/utils.py", line 750, in wait_for_engine_startup
raise RuntimeError("Engine core initialization failed. "
RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {'EngineCore_0': 1}