From ade56d514a039b1b0837a5c26588bd950b7ac652 Mon Sep 17 00:00:00 2001 From: jkottu Date: Fri, 5 Dec 2025 11:22:03 -0800 Subject: [PATCH 01/17] test vllm sm version upgrade --- dlc_developer_config.toml | 2 +- vllm/buildspec-sm.yml | 4 ++-- vllm/x86_64/gpu/Dockerfile | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 2ddfe8ccb932..bfc057d88ca3 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -184,7 +184,7 @@ dlc-pr-pytorch-eia-inference = "" dlc-pr-tensorflow-2-eia-inference = "" # vllm -dlc-pr-vllm = "" +dlc-pr-vllm = "vllm/buildspec-sm.yml" # sglang dlc-pr-sglang = "" \ No newline at end of file diff --git a/vllm/buildspec-sm.yml b/vllm/buildspec-sm.yml index a4c839d9def5..503f94147389 100644 --- a/vllm/buildspec-sm.yml +++ b/vllm/buildspec-sm.yml @@ -2,8 +2,8 @@ account_id: &ACCOUNT_ID prod_account_id: &PROD_ACCOUNT_ID 763104351884 region: ®ION framework: &FRAMEWORK vllm -version: &VERSION "0.11.2" -short_version: &SHORT_VERSION "0.11" +version: &VERSION "0.12.0" +short_version: &SHORT_VERSION "0.12" arch_type: &ARCH_TYPE x86_64 autopatch_build: "False" diff --git a/vllm/x86_64/gpu/Dockerfile b/vllm/x86_64/gpu/Dockerfile index 6e17ff62fcca..0df3091422ca 100644 --- a/vllm/x86_64/gpu/Dockerfile +++ b/vllm/x86_64/gpu/Dockerfile @@ -1,7 +1,7 @@ -FROM docker.io/vllm/vllm-openai:v0.11.2 as base +FROM docker.io/vllm/vllm-openai:v0.12.0 as base ARG PYTHON="python3" LABEL maintainer="Amazon AI" -ARG EFA_VERSION="1.43.3" +ARG EFA_VERSION="1.45.1" LABEL dlc_major_version="1" ENV DEBIAN_FRONTEND=noninteractive \ LANG=C.UTF-8 \ From 18a7ff0173a97545b0d6a2403fbe4baa114d965b Mon Sep 17 00:00:00 2001 From: jkottu Date: Fri, 5 Dec 2025 11:22:35 -0800 Subject: [PATCH 02/17] test vllm sm version upgrade --- dlc_developer_config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index bfc057d88ca3..bcf371747eb4 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -37,7 +37,7 @@ deep_canary_mode = false [build] # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. # available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] -build_frameworks = [] +build_frameworks = ["vllm"] # By default we build both training and inference containers. Set true/false values to determine which to build. From 1255cdd3b7a45f418fa0f4a859e1912711902ff3 Mon Sep 17 00:00:00 2001 From: jkottu Date: Fri, 5 Dec 2025 11:25:40 -0800 Subject: [PATCH 03/17] test vllm ec2 v upgrade --- dlc_developer_config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index bcf371747eb4..8b7c7ec49b17 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -184,7 +184,7 @@ dlc-pr-pytorch-eia-inference = "" dlc-pr-tensorflow-2-eia-inference = "" # vllm -dlc-pr-vllm = "vllm/buildspec-sm.yml" +dlc-pr-vllm = "vllm/buildspec.yml" # sglang dlc-pr-sglang = "" \ No newline at end of file From 23017168e91810ab299a12899f328c441cb386ca Mon Sep 17 00:00:00 2001 From: jkottu Date: Fri, 5 Dec 2025 11:39:34 -0800 Subject: [PATCH 04/17] retry v upgrade on vllm ec2 --- vllm/x86_64/gpu/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/x86_64/gpu/Dockerfile b/vllm/x86_64/gpu/Dockerfile index 0df3091422ca..36bf16970867 100644 --- a/vllm/x86_64/gpu/Dockerfile +++ b/vllm/x86_64/gpu/Dockerfile @@ -46,6 +46,7 @@ RUN bash install_efa.sh ${EFA_VERSION} && \ rm install_efa.sh && \ mkdir -p /tmp/nvjpeg \ && cd /tmp/nvjpeg \ + && apt-get install -y wget \ && wget https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ && tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ && rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* \ From db750d4a24f453cbc95decf746da8ef0bc962e2b Mon Sep 17 00:00:00 2001 From: jkottu Date: Fri, 5 Dec 2025 11:47:25 -0800 Subject: [PATCH 05/17] retry --- vllm/x86_64/gpu/Dockerfile | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/vllm/x86_64/gpu/Dockerfile b/vllm/x86_64/gpu/Dockerfile index 36bf16970867..affc4ac7b326 100644 --- a/vllm/x86_64/gpu/Dockerfile +++ b/vllm/x86_64/gpu/Dockerfile @@ -44,19 +44,17 @@ RUN chmod +x /usr/local/bin/deep_learning_container.py && \ RUN bash install_efa.sh ${EFA_VERSION} && \ rm install_efa.sh && \ - mkdir -p /tmp/nvjpeg \ - && cd /tmp/nvjpeg \ - && apt-get install -y wget \ - && wget https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ - && tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ - && rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* \ - && rm -rf /usr/local/cuda/targets/x86_64-linux/include/nvjpeg.h \ - && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/lib/libnvjpeg* /usr/local/cuda/targets/x86_64-linux/lib/ \ - && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/include/* /usr/local/cuda/targets/x86_64-linux/include/ \ - && rm -rf /tmp/nvjpeg \ - # remove cuobjdump and nvdisasm - && rm -rf /usr/local/cuda/bin/cuobjdump* \ - && rm -rf /usr/local/cuda/bin/nvdisasm* + mkdir -p /tmp/nvjpeg && \ + cd /tmp/nvjpeg && \ + curl -O https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz && \ + tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz && \ + rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* && \ + rm -rf /usr/local/cuda/targets/x86_64-linux/include/nvjpeg.h && \ + cp libnvjpeg-linux-x86_64-12.4.0.76-archive/lib/libnvjpeg* /usr/local/cuda/targets/x86_64-linux/lib/ && \ + cp libnvjpeg-linux-x86_64-12.4.0.76-archive/include/* /usr/local/cuda/targets/x86_64-linux/include/ && \ + rm -rf /tmp/nvjpeg && \ + rm -rf /usr/local/cuda/bin/cuobjdump* && \ + rm -rf /usr/local/cuda/bin/nvdisasm* # ====================== ec2 ========================================= From 383f1a35fe36d2d1eab59d3fb9976413520325c1 Mon Sep 17 00:00:00 2001 From: jkottu Date: Fri, 5 Dec 2025 12:22:31 -0800 Subject: [PATCH 06/17] retest sm vllm 0.12 --- dlc_developer_config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 8b7c7ec49b17..bcf371747eb4 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -184,7 +184,7 @@ dlc-pr-pytorch-eia-inference = "" dlc-pr-tensorflow-2-eia-inference = "" # vllm -dlc-pr-vllm = "vllm/buildspec.yml" +dlc-pr-vllm = "vllm/buildspec-sm.yml" # sglang dlc-pr-sglang = "" \ No newline at end of file From 81115e96fb0810cee25ec9550d6b07e7fc5c9a3c Mon Sep 17 00:00:00 2001 From: jkottu Date: Fri, 5 Dec 2025 12:45:07 -0800 Subject: [PATCH 07/17] retest ec2 vllm 0.12 --- dlc_developer_config.toml | 2 +- vllm/buildspec.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index bcf371747eb4..8b7c7ec49b17 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -184,7 +184,7 @@ dlc-pr-pytorch-eia-inference = "" dlc-pr-tensorflow-2-eia-inference = "" # vllm -dlc-pr-vllm = "vllm/buildspec-sm.yml" +dlc-pr-vllm = "vllm/buildspec.yml" # sglang dlc-pr-sglang = "" \ No newline at end of file diff --git a/vllm/buildspec.yml b/vllm/buildspec.yml index f30a5e0080c0..9e5bf6ef9fe7 100644 --- a/vllm/buildspec.yml +++ b/vllm/buildspec.yml @@ -2,8 +2,8 @@ account_id: &ACCOUNT_ID prod_account_id: &PROD_ACCOUNT_ID 763104351884 region: ®ION framework: &FRAMEWORK vllm -version: &VERSION "0.11.2" -short_version: &SHORT_VERSION "0.11" +version: &VERSION "0.12.0" +short_version: &SHORT_VERSION "0.12" arch_type: &ARCH_TYPE x86_64 autopatch_build: "False" From 49d08eef109187f1f4f45f4dc5dc96c7ee35babb Mon Sep 17 00:00:00 2001 From: jkottu Date: Fri, 5 Dec 2025 16:34:02 -0800 Subject: [PATCH 08/17] increase time out --- test/vllm/ec2/test_artifacts/test_ec2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/vllm/ec2/test_artifacts/test_ec2.py b/test/vllm/ec2/test_artifacts/test_ec2.py index 9b8cff730ebd..400bd4abe1e4 100644 --- a/test/vllm/ec2/test_artifacts/test_ec2.py +++ b/test/vllm/ec2/test_artifacts/test_ec2.py @@ -393,7 +393,7 @@ def test_vllm_on_ec2(resources, image_uri): head_conn, f"{EFA_INTEGRATION_TEST_CMD} {HOSTS_FILE_LOCATION} 2", hide=False, - timeout=DEFAULT_EFA_TIMEOUT, + timeout=500, ) test_results["efa"] = True From bcf4ce68d827a805cf194b44e7cdc7be85e73dad Mon Sep 17 00:00:00 2001 From: jkottu Date: Mon, 8 Dec 2025 09:38:38 -0800 Subject: [PATCH 09/17] reinstall nvjpeg --- vllm/x86_64/gpu/Dockerfile | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/vllm/x86_64/gpu/Dockerfile b/vllm/x86_64/gpu/Dockerfile index affc4ac7b326..eea0102b0c38 100644 --- a/vllm/x86_64/gpu/Dockerfile +++ b/vllm/x86_64/gpu/Dockerfile @@ -42,19 +42,21 @@ RUN chmod +x /usr/local/bin/deep_learning_container.py && \ rm -rf /root/.cache | true -RUN bash install_efa.sh ${EFA_VERSION} && \ +RUN apt-get install -y wget && \ + bash install_efa.sh ${EFA_VERSION} & \ rm install_efa.sh && \ - mkdir -p /tmp/nvjpeg && \ - cd /tmp/nvjpeg && \ - curl -O https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz && \ - tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz && \ - rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* && \ - rm -rf /usr/local/cuda/targets/x86_64-linux/include/nvjpeg.h && \ - cp libnvjpeg-linux-x86_64-12.4.0.76-archive/lib/libnvjpeg* /usr/local/cuda/targets/x86_64-linux/lib/ && \ - cp libnvjpeg-linux-x86_64-12.4.0.76-archive/include/* /usr/local/cuda/targets/x86_64-linux/include/ && \ - rm -rf /tmp/nvjpeg && \ - rm -rf /usr/local/cuda/bin/cuobjdump* && \ - rm -rf /usr/local/cuda/bin/nvdisasm* + mkdir -p /tmp/nvjpeg \ + && cd /tmp/nvjpeg \ + && wget https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ + && tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ + && rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* \ + && rm -rf /usr/local/cuda/targets/x86_64-linux/include/nvjpeg.h \ + && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/lib/libnvjpeg* /usr/local/cuda/targets/x86_64-linux/lib/ \ + && cp libnvjpeg-linux-x86_64-12.4.0.76-archive/include/* /usr/local/cuda/targets/x86_64-linux/include/ \ + && rm -rf /tmp/nvjpeg \ + # remove cuobjdump and nvdisasm + && rm -rf /usr/local/cuda/bin/cuobjdump* \ + && rm -rf /usr/local/cuda/bin/nvdisasm* # ====================== ec2 ========================================= From 19d4aef2fa8196affcdebb744a05658ab4be3c69 Mon Sep 17 00:00:00 2001 From: jkottu Date: Mon, 8 Dec 2025 09:50:28 -0800 Subject: [PATCH 10/17] test nccl in container --- test/dlc_tests/ec2/test_efa.py | 2 ++ vllm/x86_64/gpu/Dockerfile | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/test/dlc_tests/ec2/test_efa.py b/test/dlc_tests/ec2/test_efa.py index 8398cd1a9204..eb33d871b370 100644 --- a/test/dlc_tests/ec2/test_efa.py +++ b/test/dlc_tests/ec2/test_efa.py @@ -1,6 +1,7 @@ import os import pytest +import time import test.test_utils.ec2 as ec2_utils from test.test_utils import ( @@ -214,6 +215,7 @@ def _setup_multinode_efa_instances( build_all_reduce_perf_promises = [] # Run container _setup_container(master_connection, image, MASTER_CONTAINER_NAME) + time.sleep(3000) # Build all_reduce_perf binary using nccl-tests promise = run_cmd_on_container( MASTER_CONTAINER_NAME, diff --git a/vllm/x86_64/gpu/Dockerfile b/vllm/x86_64/gpu/Dockerfile index eea0102b0c38..752170900af8 100644 --- a/vllm/x86_64/gpu/Dockerfile +++ b/vllm/x86_64/gpu/Dockerfile @@ -42,12 +42,11 @@ RUN chmod +x /usr/local/bin/deep_learning_container.py && \ rm -rf /root/.cache | true -RUN apt-get install -y wget && \ - bash install_efa.sh ${EFA_VERSION} & \ +RUN bash install_efa.sh ${EFA_VERSION} & \ rm install_efa.sh && \ mkdir -p /tmp/nvjpeg \ && cd /tmp/nvjpeg \ - && wget https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ + && curl -O https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ && tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ && rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* \ && rm -rf /usr/local/cuda/targets/x86_64-linux/include/nvjpeg.h \ From 197347274f9010593206780372d53a770436dbee Mon Sep 17 00:00:00 2001 From: jkottu Date: Mon, 8 Dec 2025 10:22:41 -0800 Subject: [PATCH 11/17] test nccl in container --- vllm/x86_64/gpu/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/x86_64/gpu/Dockerfile b/vllm/x86_64/gpu/Dockerfile index 752170900af8..7a2ee330f0c6 100644 --- a/vllm/x86_64/gpu/Dockerfile +++ b/vllm/x86_64/gpu/Dockerfile @@ -55,8 +55,8 @@ RUN bash install_efa.sh ${EFA_VERSION} & \ && rm -rf /tmp/nvjpeg \ # remove cuobjdump and nvdisasm && rm -rf /usr/local/cuda/bin/cuobjdump* \ - && rm -rf /usr/local/cuda/bin/nvdisasm* - + && rm -rf /usr/local/cuda/bin/nvdisasm* \ + && apt-get install -y git # ====================== ec2 ========================================= FROM base AS vllm-ec2 From 3169cb02d7b8d04fd17d8ae50a01224269435e09 Mon Sep 17 00:00:00 2001 From: jkottu Date: Mon, 8 Dec 2025 10:22:48 -0800 Subject: [PATCH 12/17] test nccl in container --- test/dlc_tests/ec2/test_efa.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/dlc_tests/ec2/test_efa.py b/test/dlc_tests/ec2/test_efa.py index eb33d871b370..fd1977876c7e 100644 --- a/test/dlc_tests/ec2/test_efa.py +++ b/test/dlc_tests/ec2/test_efa.py @@ -215,7 +215,6 @@ def _setup_multinode_efa_instances( build_all_reduce_perf_promises = [] # Run container _setup_container(master_connection, image, MASTER_CONTAINER_NAME) - time.sleep(3000) # Build all_reduce_perf binary using nccl-tests promise = run_cmd_on_container( MASTER_CONTAINER_NAME, From 8d3f4f5db2ef5361b6ff4483fa64ebddad16c137 Mon Sep 17 00:00:00 2001 From: jkottu Date: Mon, 8 Dec 2025 12:15:43 -0800 Subject: [PATCH 13/17] test nccl in container --- vllm/x86_64/gpu/Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vllm/x86_64/gpu/Dockerfile b/vllm/x86_64/gpu/Dockerfile index 7a2ee330f0c6..1162047deee0 100644 --- a/vllm/x86_64/gpu/Dockerfile +++ b/vllm/x86_64/gpu/Dockerfile @@ -19,7 +19,6 @@ WORKDIR / COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh -COPY install_efa.sh install_efa.sh RUN chmod +x /usr/local/bin/deep_learning_container.py && \ chmod +x /usr/local/bin/bash_telemetry.sh && \ @@ -41,10 +40,11 @@ RUN chmod +x /usr/local/bin/deep_learning_container.py && \ rm -rf /var/lib/apt/lists/* && \ rm -rf /root/.cache | true - +COPY install_efa.sh install_efa.sh RUN bash install_efa.sh ${EFA_VERSION} & \ - rm install_efa.sh && \ - mkdir -p /tmp/nvjpeg \ + rm install_efa.sh + +RUN mkdir -p /tmp/nvjpeg \ && cd /tmp/nvjpeg \ && curl -O https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ && tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \ From b902ca56dbf0412428be1b02a5c8e7bd4b8edc6f Mon Sep 17 00:00:00 2001 From: jkottu Date: Mon, 8 Dec 2025 12:21:36 -0800 Subject: [PATCH 14/17] test nccl in container --- vllm/x86_64/gpu/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/x86_64/gpu/Dockerfile b/vllm/x86_64/gpu/Dockerfile index 1162047deee0..d79a26eab958 100644 --- a/vllm/x86_64/gpu/Dockerfile +++ b/vllm/x86_64/gpu/Dockerfile @@ -41,7 +41,7 @@ RUN chmod +x /usr/local/bin/deep_learning_container.py && \ rm -rf /root/.cache | true COPY install_efa.sh install_efa.sh -RUN bash install_efa.sh ${EFA_VERSION} & \ +RUN bash install_efa.sh ${EFA_VERSION} && \ rm install_efa.sh RUN mkdir -p /tmp/nvjpeg \ From 21a326b60a274fe3ceaa105108b13a8e87c3a562 Mon Sep 17 00:00:00 2001 From: jkottu Date: Mon, 8 Dec 2025 12:59:02 -0800 Subject: [PATCH 15/17] increase timeout --- test/dlc_tests/ec2/test_efa.py | 2 +- test/vllm/ec2/test_artifacts/test_ec2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/dlc_tests/ec2/test_efa.py b/test/dlc_tests/ec2/test_efa.py index fd1977876c7e..a54eef1066df 100644 --- a/test/dlc_tests/ec2/test_efa.py +++ b/test/dlc_tests/ec2/test_efa.py @@ -39,7 +39,7 @@ WORKER_CONTAINER_NAME = "worker_container" HOSTS_FILE_LOCATION = "/root/hosts" -DEFAULT_EFA_TIMEOUT = 300 +DEFAULT_EFA_TIMEOUT = 1000 EC2_EFA_GPU_INSTANCE_TYPE_AND_REGION = get_efa_ec2_instance_type( default="p4d.24xlarge", diff --git a/test/vllm/ec2/test_artifacts/test_ec2.py b/test/vllm/ec2/test_artifacts/test_ec2.py index 400bd4abe1e4..add2b895d282 100644 --- a/test/vllm/ec2/test_artifacts/test_ec2.py +++ b/test/vllm/ec2/test_artifacts/test_ec2.py @@ -393,7 +393,7 @@ def test_vllm_on_ec2(resources, image_uri): head_conn, f"{EFA_INTEGRATION_TEST_CMD} {HOSTS_FILE_LOCATION} 2", hide=False, - timeout=500, + timeout=1000, ) test_results["efa"] = True From fc60bbf3b13f9e330dcc625c3a7d2145f5cb5212 Mon Sep 17 00:00:00 2001 From: jkottu Date: Mon, 8 Dec 2025 13:29:11 -0800 Subject: [PATCH 16/17] rebuild and retest sm vllm 0.12 --- dlc_developer_config.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 8b7c7ec49b17..bcf371747eb4 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -184,7 +184,7 @@ dlc-pr-pytorch-eia-inference = "" dlc-pr-tensorflow-2-eia-inference = "" # vllm -dlc-pr-vllm = "vllm/buildspec.yml" +dlc-pr-vllm = "vllm/buildspec-sm.yml" # sglang dlc-pr-sglang = "" \ No newline at end of file From 243fc1fbcc82c669f64f35a7112b2714be6d5e5c Mon Sep 17 00:00:00 2001 From: jkottu Date: Mon, 8 Dec 2025 15:06:41 -0800 Subject: [PATCH 17/17] revert toml --- dlc_developer_config.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index bcf371747eb4..2ddfe8ccb932 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -37,7 +37,7 @@ deep_canary_mode = false [build] # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. # available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] -build_frameworks = ["vllm"] +build_frameworks = [] # By default we build both training and inference containers. Set true/false values to determine which to build. @@ -184,7 +184,7 @@ dlc-pr-pytorch-eia-inference = "" dlc-pr-tensorflow-2-eia-inference = "" # vllm -dlc-pr-vllm = "vllm/buildspec-sm.yml" +dlc-pr-vllm = "" # sglang dlc-pr-sglang = "" \ No newline at end of file