From 54554928855667d0e9ba5bacbc17b9da758b19d8 Mon Sep 17 00:00:00 2001 From: wangzaijun Date: Sat, 9 May 2026 06:13:04 +0000 Subject: [PATCH 1/2] fix --- lightllm/common/basemodel/basemodel.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lightllm/common/basemodel/basemodel.py b/lightllm/common/basemodel/basemodel.py index a980ef29a..801880ba8 100755 --- a/lightllm/common/basemodel/basemodel.py +++ b/lightllm/common/basemodel/basemodel.py @@ -212,9 +212,20 @@ def _init_kv_move_buffer(self): def _check_mem_size(self): self.max_total_token_num = self.mem_manager.size + assert ( self.max_total_token_num > self.batch_max_tokens ), "max_total_token_num must be greater than batch_max_tokens" + + # 非个人性能模式下,需要保证 max_seq_length 小于等于 max_total_token_num, + # 这样才能得到完整的上下文长度的支持。个人模式主要是私有化场景,显卡显存不是 + # 特别大,可能能分配的 kv 容量有限,无法支持 max_seq_length 的推理。所以个人模式下 + # 可以适当放宽这个限制,不做这个校验。 + if self.args.performance_mode != "personal": + assert ( + self.max_seq_length <= self.max_total_token_num + ), "max_total_token_num must be greater than max_seq_length" + return def _init_req_manager(self): From c25d0f2b86e5d58cad8a0fe9c10acaa288ce5d8c Mon Sep 17 00:00:00 2001 From: wangzaijun Date: Sat, 9 May 2026 06:22:22 +0000 Subject: [PATCH 2/2] fix --- lightllm/common/basemodel/basemodel.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lightllm/common/basemodel/basemodel.py b/lightllm/common/basemodel/basemodel.py index 801880ba8..05aaaadca 100755 --- a/lightllm/common/basemodel/basemodel.py +++ b/lightllm/common/basemodel/basemodel.py @@ -222,9 +222,12 @@ def _check_mem_size(self): # 特别大,可能能分配的 kv 容量有限,无法支持 max_seq_length 的推理。所以个人模式下 # 可以适当放宽这个限制,不做这个校验。 if self.args.performance_mode != "personal": - assert ( - self.max_seq_length <= self.max_total_token_num - ), "max_total_token_num must be greater than max_seq_length" + assert self.max_seq_length <= self.max_total_token_num, ( + f"max_total_token_num must be >= max_seq_length, " + f"got max_total_token_num={self.max_total_token_num}, " + f"max_seq_length={self.max_seq_length}. " + f"Try set --max_req_total_len a smaller value < {self.max_total_token_num}." + ) return