Skip to content

Commit 697eadc

Browse files
vllm dlc upgrade to 0.12 (#5536)
* test vllm sm version upgrade * test vllm sm version upgrade * test vllm ec2 v upgrade * retry v upgrade on vllm ec2 * retry * retest sm vllm 0.12 * retest ec2 vllm 0.12 * increase time out * reinstall nvjpeg * test nccl in container * test nccl in container * test nccl in container * test nccl in container * test nccl in container * increase timeout * rebuild and retest sm vllm 0.12 * revert toml
1 parent 108a63d commit 697eadc

File tree

5 files changed

+15
-14
lines changed

5 files changed

+15
-14
lines changed

test/dlc_tests/ec2/test_efa.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22

33
import pytest
4+
import time
45

56
import test.test_utils.ec2 as ec2_utils
67
from test.test_utils import (
@@ -38,7 +39,7 @@
3839
WORKER_CONTAINER_NAME = "worker_container"
3940
HOSTS_FILE_LOCATION = "/root/hosts"
4041

41-
DEFAULT_EFA_TIMEOUT = 300
42+
DEFAULT_EFA_TIMEOUT = 1000
4243

4344
EC2_EFA_GPU_INSTANCE_TYPE_AND_REGION = get_efa_ec2_instance_type(
4445
default="p4d.24xlarge",

test/vllm/ec2/test_artifacts/test_ec2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@ def test_vllm_on_ec2(resources, image_uri):
393393
head_conn,
394394
f"{EFA_INTEGRATION_TEST_CMD} {HOSTS_FILE_LOCATION} 2",
395395
hide=False,
396-
timeout=DEFAULT_EFA_TIMEOUT,
396+
timeout=1000,
397397
)
398398

399399
test_results["efa"] = True

vllm/buildspec-sm.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
22
prod_account_id: &PROD_ACCOUNT_ID 763104351884
33
region: &REGION <set-$REGION-in-environment>
44
framework: &FRAMEWORK vllm
5-
version: &VERSION "0.11.2"
6-
short_version: &SHORT_VERSION "0.11"
5+
version: &VERSION "0.12.0"
6+
short_version: &SHORT_VERSION "0.12"
77
arch_type: &ARCH_TYPE x86_64
88
autopatch_build: "False"
99

vllm/buildspec.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
22
prod_account_id: &PROD_ACCOUNT_ID 763104351884
33
region: &REGION <set-$REGION-in-environment>
44
framework: &FRAMEWORK vllm
5-
version: &VERSION "0.11.2"
6-
short_version: &SHORT_VERSION "0.11"
5+
version: &VERSION "0.12.0"
6+
short_version: &SHORT_VERSION "0.12"
77
arch_type: &ARCH_TYPE x86_64
88
autopatch_build: "False"
99

vllm/x86_64/gpu/Dockerfile

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM docker.io/vllm/vllm-openai:v0.11.2 as base
1+
FROM docker.io/vllm/vllm-openai:v0.12.0 as base
22
ARG PYTHON="python3"
33
LABEL maintainer="Amazon AI"
44
ARG EFA_VERSION="1.45.1"
@@ -19,7 +19,6 @@ WORKDIR /
1919

2020
COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
2121
COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh
22-
COPY install_efa.sh install_efa.sh
2322

2423
RUN chmod +x /usr/local/bin/deep_learning_container.py && \
2524
chmod +x /usr/local/bin/bash_telemetry.sh && \
@@ -41,12 +40,13 @@ RUN chmod +x /usr/local/bin/deep_learning_container.py && \
4140
rm -rf /var/lib/apt/lists/* && \
4241
rm -rf /root/.cache | true
4342

44-
43+
COPY install_efa.sh install_efa.sh
4544
RUN bash install_efa.sh ${EFA_VERSION} && \
46-
rm install_efa.sh && \
47-
mkdir -p /tmp/nvjpeg \
45+
rm install_efa.sh
46+
47+
RUN mkdir -p /tmp/nvjpeg \
4848
&& cd /tmp/nvjpeg \
49-
&& wget https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \
49+
&& curl -O https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \
5050
&& tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \
5151
&& rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* \
5252
&& rm -rf /usr/local/cuda/targets/x86_64-linux/include/nvjpeg.h \
@@ -55,8 +55,8 @@ RUN bash install_efa.sh ${EFA_VERSION} && \
5555
&& rm -rf /tmp/nvjpeg \
5656
# remove cuobjdump and nvdisasm
5757
&& rm -rf /usr/local/cuda/bin/cuobjdump* \
58-
&& rm -rf /usr/local/cuda/bin/nvdisasm*
59-
58+
&& rm -rf /usr/local/cuda/bin/nvdisasm* \
59+
&& apt-get install -y git
6060

6161
# ====================== ec2 =========================================
6262
FROM base AS vllm-ec2

0 commit comments

Comments
 (0)