Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
246 changes: 246 additions & 0 deletions vllm/inference/0.13.0/Dockerfile.neuronx
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
ARG BUILD_STAGE=prod

FROM public.ecr.aws/docker/library/ubuntu:24.04 AS base

LABEL dlc_major_version="1"
LABEL maintainer="Amazon AI"

ARG DEBIAN_FRONTEND=noninteractive
ARG PIP=pip3
ARG PYTHON=python3.12
ARG PYTHON_VERSION=3.12.11
ARG TORCHSERVE_VERSION=0.11.0
ARG PYPI_SIMPLE_URL="https://pypi.org/simple/"


# See http://bugs.python.org/issue19846
ENV LANG=C.UTF-8
ENV LD_LIBRARY_PATH=/opt/aws/neuron/lib:/lib/x86_64-linux-gnu:/opt/conda/lib/:$LD_LIBRARY_PATH

Check warning on line 18 in vllm/inference/0.13.0/Dockerfile.neuronx

View workflow job for this annotation

GitHub Actions / build (0.13.0, vllm/inference/0.13.0)

Variables should be defined before their use

UndefinedVar: Usage of undefined variable '$LD_LIBRARY_PATH' More info: https://docs.docker.com/go/dockerfile/rule/undefined-var/
ENV PATH=/opt/conda/bin:/opt/aws/neuron/bin:$PATH

RUN apt-get update \
&& apt-get upgrade -y \
&& apt-get install -y --no-install-recommends \
apt-transport-https \
build-essential \
ca-certificates \
cmake \
curl \
emacs \
ffmpeg \
gcc \
git \
gnupg2 \
gpg-agent \
jq \
libgl1 \
libgl1-mesa-dri \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender-dev \
libcap-dev \
libhwloc-dev \
openssh-client \
openjdk-11-jdk \
unzip \
vim \
wget \
zlib1g-dev \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /tmp/tmp* \
&& apt-get clean


# https://github.com/docker-library/openjdk/issues/261 https://github.com/docker-library/openjdk/pull/263/files
RUN keytool -importkeystore -srckeystore /etc/ssl/certs/java/cacerts -destkeystore /etc/ssl/certs/java/cacerts.jks -deststoretype JKS -srcstorepass changeit -deststorepass changeit -noprompt; \
mv /etc/ssl/certs/java/cacerts.jks /etc/ssl/certs/java/cacerts; \
/var/lib/dpkg/info/ca-certificates-java.postinst configure;

RUN curl -L -o ~/miniforge.sh https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh \
&& chmod +x ~/miniforge.sh \
&& ~/miniforge.sh -b -p /opt/conda \
&& rm ~/miniforge.sh \
&& /opt/conda/bin/conda update -y conda \
&& /opt/conda/bin/mamba install -c conda-forge -y \
python=$PYTHON_VERSION \
pyopenssl \
cython \
mkl-include \
mkl \
parso \
typing \
# Below 2 are included in miniconda base, but not mamba so need to install
conda-content-trust \
charset-normalizer \
&& /opt/conda/bin/conda clean -ya

RUN /opt/conda/bin/mamba install -c conda-forge \
python=$PYTHON_VERSION \
scikit-learn \
h5py \
requests \
&& conda clean -ya \
&& pip install --upgrade pip \
--trusted-host pypi.org --trusted-host files.pythonhosted.org \
&& ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \
&& pip install \
enum-compat \
ipython \
&& rm -rf ~/.cache/pip/*

# Install EFA
RUN apt-get update \
&& cd $HOME \
&& curl -O https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz \
&& wget https://efa-installer.amazonaws.com/aws-efa-installer.key && gpg --import aws-efa-installer.key \
&& cat aws-efa-installer.key | gpg --fingerprint \
&& wget https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz.sig && gpg --verify ./aws-efa-installer-latest.tar.gz.sig \
&& tar -xf aws-efa-installer-latest.tar.gz \
&& cd aws-efa-installer \
&& ./efa_installer.sh -y -g --skip-kmod --skip-limit-conf --no-verify \
&& cd $HOME \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /tmp/tmp* \
&& apt-get clean

COPY --chmod=755 vllm_entrypoint.py neuron-monitor.sh deep_learning_container.py /usr/local/bin/

### Mount Point ###
# When launching the container, mount the code directory to /workspace
ARG APP_MOUNT=/workspace
VOLUME [ ${APP_MOUNT} ]
WORKDIR ${APP_MOUNT}/vllm

RUN ${PIP} install --no-cache-dir -U \
"opencv-python" \
"awscli" \
"pandas" \
"boto3" \
"cryptography" \
"pytest" \
"wheel" \
"cmake>=3.26" \
"setuptools-scm>=8" \
"jinja2" \
torchserve==${TORCHSERVE_VERSION} \
torch-model-archiver==${TORCHSERVE_VERSION} \
&& rm -rf ~/.cache/pip/*

RUN useradd -m model-server \
&& mkdir -p /home/model-server/tmp /opt/ml/model \
&& chown -R model-server /home/model-server /opt/ml/model
COPY config.properties /home/model-server

# Compliance
RUN HOME_DIR=/root \
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
&& unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
&& chmod +x /usr/local/bin/testOSSCompliance \
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
&& ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
&& rm -rf ${HOME_DIR}/oss_compliance* \
# conda leaves an empty /root/.cache/conda/notices.cache file which is not removed by conda clean -ya
&& rm -rf ${HOME_DIR}/.cache/conda

# Setting up APT and PIP repo for neuron artifacts
ARG NEURON_APT_REPO=apt.repos.neuron.amazonaws.com
ARG NEURON_APT_REPO_KEY

Check warning on line 149 in vllm/inference/0.13.0/Dockerfile.neuronx

View workflow job for this annotation

GitHub Actions / build (0.13.0, vllm/inference/0.13.0)

Sensitive data should not be used in the ARG or ENV commands

SecretsUsedInArgOrEnv: Do not use ARG or ENV instructions for sensitive data (ARG "NEURON_APT_REPO_KEY") More info: https://docs.docker.com/go/dockerfile/rule/secrets-used-in-arg-or-env/
ARG NEURON_PIP_REPO=pip.repos.neuron.amazonaws.com
ARG NEURON_PIP_REPO_KEY

Check warning on line 151 in vllm/inference/0.13.0/Dockerfile.neuronx

View workflow job for this annotation

GitHub Actions / build (0.13.0, vllm/inference/0.13.0)

Sensitive data should not be used in the ARG or ENV commands

SecretsUsedInArgOrEnv: Do not use ARG or ENV instructions for sensitive data (ARG "NEURON_PIP_REPO_KEY") More info: https://docs.docker.com/go/dockerfile/rule/secrets-used-in-arg-or-env/
RUN mkdir -p /etc/apt/keyrings \
&& APT_REPO_PREFIX=$([ -n "${NEURON_APT_REPO_KEY}" ] && echo "${NEURON_APT_REPO_KEY}@" || echo "") \
&& echo "deb [signed-by=/etc/apt/keyrings/neuron.gpg] https://${APT_REPO_PREFIX}${NEURON_APT_REPO} jammy main" > /etc/apt/sources.list.d/neuron.list \
&& curl $([ -n "${NEURON_APT_REPO_KEY}" ] && echo "-u ${NEURON_APT_REPO_KEY}") --retry 3 --retry-delay 1 --retry-all-errors -fSL "https://${NEURON_APT_REPO}/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB" | gpg --dearmor > /etc/apt/keyrings/neuron.gpg

# Neuron SDK components version numbers
ARG NEURONX_COLLECTIVES_LIB_VERSION=2.29.41.0-681fef5f5
ARG NEURONX_RUNTIME_LIB_VERSION=2.29.40.0-f954cd7a5
ARG NEURONX_TOOLS_VERSION=2.27.33.0-5d9c0b901

ARG NEURONX_CC_VERSION=2.22.12471.0+b4a00d10
ARG NEURONX_FRAMEWORK_VERSION=2.9.0.2.11.19912+e48cd891
ARG NEURONX_DISTRIBUTED_VERSION=0.16.25997+f431c02e
ARG NEURONX_DISTRIBUTED_INFERENCE_VERSION=0.7.15063+bafa28d5
ARG NKI_VERSION=0.1.0+g432b459e

# vLLM branch names
ARG VLLM_PRIVATE_BRANCH=release-0.3.0
ARG VLLM_PUBLIC_BRANCH=release-0.3.0

FROM base AS vllm-clone

RUN mkdir -p /root/.ssh && \
echo "StrictHostKeyChecking no" >> /root/.ssh/config && \
ssh-keyscan -t rsa github.com >> /root/.ssh/known_hosts

WORKDIR /vllm

RUN --mount=type=secret,id=ssh_key,target=/root/.ssh/id_ed25519,mode=0600 \
git clone -b ${VLLM_PRIVATE_BRANCH} git@github.com:aws-neuron/private-vllm-neuron.git .

FROM base AS repo


# Install Neuron components from the apt and pip repos (latest versions)
RUN apt-get update \
&& apt-get install -y \
aws-neuronx-tools \
aws-neuronx-collectives \
aws-neuronx-runtime-lib \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /tmp/tmp* \
&& apt-get clean

# Install VLLM from source
COPY --from=vllm-clone /vllm /opt/vllm

RUN PIP_REPO_URL=$([ -n "${NEURON_PIP_REPO_KEY}" ] && echo "https://${NEURON_PIP_REPO_KEY}@${NEURON_PIP_REPO}" || echo "https://${NEURON_PIP_REPO}") \
&& ${PIP} install --no-cache-dir \
--index-url ${PIP_REPO_URL} \
--trusted-host ${NEURON_PIP_REPO} \
--extra-index-url ${PYPI_SIMPLE_URL} \
"neuronx-cc>=2.0" \
"torch-neuronx==2.9.*" \
neuronx_distributed \
neuronx_distributed_inference \
nki \
-e /opt/vllm \
&& rm -rf ~/.cache/pip/*

FROM base AS prod

# Install Neuron components with specific versions
RUN apt-get update \
&& apt-get install -y \
aws-neuronx-tools=$NEURONX_TOOLS_VERSION \
aws-neuronx-collectives=$NEURONX_COLLECTIVES_LIB_VERSION \
aws-neuronx-runtime-lib=$NEURONX_RUNTIME_LIB_VERSION \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /tmp/tmp* \
&& apt-get clean

# Clone VLLM source before pip installations
RUN git clone -b "${VLLM_PUBLIC_BRANCH}" https://github.com/vllm-project/vllm-neuron.git /opt/vllm

RUN PIP_REPO_URL=$([ -n "${NEURON_PIP_REPO_KEY}" ] && echo "https://${NEURON_PIP_REPO_KEY}@${NEURON_PIP_REPO}" || echo "https://${NEURON_PIP_REPO}") \
&& ${PIP} install --no-cache-dir \
--index-url ${PIP_REPO_URL} \
--trusted-host ${NEURON_PIP_REPO} \
--extra-index-url ${PYPI_SIMPLE_URL} \
neuronx-cc==$NEURONX_CC_VERSION \
torch-neuronx==$NEURONX_FRAMEWORK_VERSION \
neuronx_distributed==$NEURONX_DISTRIBUTED_VERSION \
neuronx_distributed_inference==$NEURONX_DISTRIBUTED_INFERENCE_VERSION \
nki==$NKI_VERSION \
-e /opt/vllm \
&& rm -rf ~/.cache/pip/*

FROM ${BUILD_STAGE} AS final

EXPOSE 8080 8081

ENTRYPOINT ["python", "/usr/local/bin/vllm_entrypoint.py"]
CMD ["/bin/bash"]
HEALTHCHECK CMD curl --fail http://localhost:8080/ping || exit 1
Loading