From 46aa87209c1cbd432b09669be27df26636f183bc Mon Sep 17 00:00:00 2001 From: Benedict Date: Thu, 9 Oct 2025 16:07:17 +0800 Subject: [PATCH] fix: fix the bug of online serving --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8e22705..6bff6f2 100644 --- a/README.md +++ b/README.md @@ -264,6 +264,8 @@ vllm serve \ --gpu-memory-utilization 0.8 \ --host 0.0.0.0 \ --port 8000 \ + --max-num-batched-tokens 80960 \ + --max-model-len 80960 \ --trust-remote-code ``` @@ -358,7 +360,7 @@ def prepare_message_for_vllm(content_messages): for part_message in message_content_list: if 'video' in part_message: video_message = [{'content': [part_message]}] - image_inputs, video_inputs, video_kwargs = process_vision_info(video_message, return_video_kwargs=True) + image_inputs, video_inputs, video_kwargs = process_vision_info(video_message) assert video_inputs is not None, "video_inputs should not be None" video_input = (video_inputs.pop()).permute(0, 2, 3, 1).numpy().astype(np.uint8) fps_list.extend(video_kwargs.get('fps', []))