From 9e1a8c3fd5dd07404bddb9af2a628b5c8ad59f37 Mon Sep 17 00:00:00 2001 From: prernanookala-ai Date: Thu, 29 Jan 2026 15:23:54 -0600 Subject: [PATCH 1/2] Add parameters for Qwen2.5-vl-7b-instruct model Signed-off-by: prernanookala-ai --- core/helm-charts/vllm/xeon-values.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/core/helm-charts/vllm/xeon-values.yaml b/core/helm-charts/vllm/xeon-values.yaml index 00cb6384..7513d317 100644 --- a/core/helm-charts/vllm/xeon-values.yaml +++ b/core/helm-charts/vllm/xeon-values.yaml @@ -243,6 +243,12 @@ modelConfigs: tensor_parallel_size: "{{ .Values.tensor_parallel_size }}" pipeline_parallel_size: "{{ .Values.pipeline_parallel_size }}" + "Qwen/Qwen2.5-VL-7B-Instruct": + configMapValues: + VLLM_SKIP_WARMUP: true + extraCmdArgs: ["--max-model-len","33024"] + tensor_parallel_size: "1" + defaultModelConfigs: configMapValues: VLLM_CPU_KVCACHE_SPACE: "40" @@ -270,4 +276,4 @@ defaultModelConfigs: "256", ] tensor_parallel_size: "{{ .Values.tensor_parallel_size }}" - pipeline_parallel_size: "{{ .Values.pipeline_parallel_size }}" \ No newline at end of file + pipeline_parallel_size: "{{ .Values.pipeline_parallel_size }}" From 8c889ddcc5e847e38b0be97a09edfa04cd35687e Mon Sep 17 00:00:00 2001 From: prernanookala-ai Date: Fri, 30 Jan 2026 18:06:35 -0600 Subject: [PATCH 2/2] Update parameters for Qwen2.5-VL per review Signed-off-by: prernanookala-ai --- core/helm-charts/vllm/xeon-values.yaml | 30 +++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/core/helm-charts/vllm/xeon-values.yaml b/core/helm-charts/vllm/xeon-values.yaml index 7513d317..499e46ae 100644 --- a/core/helm-charts/vllm/xeon-values.yaml +++ b/core/helm-charts/vllm/xeon-values.yaml @@ -245,9 +245,33 @@ modelConfigs: "Qwen/Qwen2.5-VL-7B-Instruct": configMapValues: - VLLM_SKIP_WARMUP: true - extraCmdArgs: ["--max-model-len","33024"] - tensor_parallel_size: "1" + VLLM_SKIP_WARMUP: true + VLLM_CPU_KVCACHE_SPACE: "40" + VLLM_RPC_TIMEOUT: "100000" + VLLM_ALLOW_LONG_MAX_MODEL_LEN: "1" + VLLM_ENGINE_ITERATION_TIMEOUT_S: "120" + VLLM_CPU_NUM_OF_RESERVED_CPU: "0" + VLLM_CPU_SGL_KERNEL: "1" + HF_HUB_DISABLE_XET: "1" + extraCmdArgs: + [ + "--block-size", + "128", + "--dtype", + "bfloat16", + "--distributed_executor_backend", + "mp", + "--enable_chunked_prefill", + "--enforce-eager", + "--max-model-len", + "33024", + "--max-num-batched-tokens", + "2048", + "--max-num-seqs", + "256", + ] + tensor_parallel_size: "{{ .Values.tensor_parallel_size }}" + pipeline_parallel_size: "{{ .Values.pipeline_parallel_size }}" defaultModelConfigs: configMapValues: