Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion test/dlc_tests/ec2/test_efa.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os

import pytest
import time

import test.test_utils.ec2 as ec2_utils
from test.test_utils import (
Expand Down Expand Up @@ -38,7 +39,7 @@
WORKER_CONTAINER_NAME = "worker_container"
HOSTS_FILE_LOCATION = "/root/hosts"

DEFAULT_EFA_TIMEOUT = 300
DEFAULT_EFA_TIMEOUT = 1000

EC2_EFA_GPU_INSTANCE_TYPE_AND_REGION = get_efa_ec2_instance_type(
default="p4d.24xlarge",
Expand Down
2 changes: 1 addition & 1 deletion test/vllm/ec2/test_artifacts/test_ec2.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ def test_vllm_on_ec2(resources, image_uri):
head_conn,
f"{EFA_INTEGRATION_TEST_CMD} {HOSTS_FILE_LOCATION} 2",
hide=False,
timeout=DEFAULT_EFA_TIMEOUT,
timeout=1000,
)

test_results["efa"] = True
Expand Down
4 changes: 2 additions & 2 deletions vllm/buildspec-sm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
prod_account_id: &PROD_ACCOUNT_ID 763104351884
region: &REGION <set-$REGION-in-environment>
framework: &FRAMEWORK vllm
version: &VERSION "0.11.2"
short_version: &SHORT_VERSION "0.11"
version: &VERSION "0.12.0"
short_version: &SHORT_VERSION "0.12"
arch_type: &ARCH_TYPE x86_64
autopatch_build: "False"

Expand Down
4 changes: 2 additions & 2 deletions vllm/buildspec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
prod_account_id: &PROD_ACCOUNT_ID 763104351884
region: &REGION <set-$REGION-in-environment>
framework: &FRAMEWORK vllm
version: &VERSION "0.11.2"
short_version: &SHORT_VERSION "0.11"
version: &VERSION "0.12.0"
short_version: &SHORT_VERSION "0.12"
arch_type: &ARCH_TYPE x86_64
autopatch_build: "False"

Expand Down
16 changes: 8 additions & 8 deletions vllm/x86_64/gpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/vllm/vllm-openai:v0.11.2 as base
FROM docker.io/vllm/vllm-openai:v0.12.0 as base
ARG PYTHON="python3"
LABEL maintainer="Amazon AI"
ARG EFA_VERSION="1.45.1"
Expand All @@ -19,7 +19,6 @@ WORKDIR /

COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
COPY bash_telemetry.sh /usr/local/bin/bash_telemetry.sh
COPY install_efa.sh install_efa.sh

RUN chmod +x /usr/local/bin/deep_learning_container.py && \
chmod +x /usr/local/bin/bash_telemetry.sh && \
Expand All @@ -41,12 +40,13 @@ RUN chmod +x /usr/local/bin/deep_learning_container.py && \
rm -rf /var/lib/apt/lists/* && \
rm -rf /root/.cache | true


COPY install_efa.sh install_efa.sh
RUN bash install_efa.sh ${EFA_VERSION} && \
rm install_efa.sh && \
mkdir -p /tmp/nvjpeg \
rm install_efa.sh

RUN mkdir -p /tmp/nvjpeg \
&& cd /tmp/nvjpeg \
&& wget https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \
&& curl -O https://developer.download.nvidia.com/compute/cuda/redist/libnvjpeg/linux-x86_64/libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \
&& tar -xvf libnvjpeg-linux-x86_64-12.4.0.76-archive.tar.xz \
&& rm -rf /usr/local/cuda/targets/x86_64-linux/lib/libnvjpeg* \
&& rm -rf /usr/local/cuda/targets/x86_64-linux/include/nvjpeg.h \
Expand All @@ -55,8 +55,8 @@ RUN bash install_efa.sh ${EFA_VERSION} && \
&& rm -rf /tmp/nvjpeg \
# remove cuobjdump and nvdisasm
&& rm -rf /usr/local/cuda/bin/cuobjdump* \
&& rm -rf /usr/local/cuda/bin/nvdisasm*

&& rm -rf /usr/local/cuda/bin/nvdisasm* \
&& apt-get install -y git

# ====================== ec2 =========================================
FROM base AS vllm-ec2
Expand Down