From 46aa87209c1cbd432b09669be27df26636f183bc Mon Sep 17 00:00:00 2001
From: Benedict <haoyang.byte@gmail.com>
Date: Thu, 9 Oct 2025 16:07:17 +0800
Subject: [PATCH] fix: fix the bug of online serving

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8e22705..6bff6f2 100644
--- a/README.md
+++ b/README.md
@@ -264,6 +264,8 @@ vllm serve \
     --gpu-memory-utilization 0.8 \
     --host 0.0.0.0 \
     --port 8000 \
+    --max-num-batched-tokens 80960 \
+    --max-model-len 80960 \
     --trust-remote-code
 ```
 
@@ -358,7 +360,7 @@ def prepare_message_for_vllm(content_messages):
         for part_message in message_content_list:
             if 'video' in part_message:
                 video_message = [{'content': [part_message]}]
-                image_inputs, video_inputs, video_kwargs = process_vision_info(video_message, return_video_kwargs=True)
+                image_inputs, video_inputs, video_kwargs = process_vision_info(video_message)
                 assert video_inputs is not None, "video_inputs should not be None"
                 video_input = (video_inputs.pop()).permute(0, 2, 3, 1).numpy().astype(np.uint8)
                 fps_list.extend(video_kwargs.get('fps', []))