Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 32 additions & 28 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,48 +1,52 @@
ARG CUDA_VERSION=12.6.1
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
ARG PYTHON_VERSION=3.10
ARG MAMBA_VERSION=24.7.1-0
ARG TARGETPLATFORM
ENV PATH=/opt/conda/bin:$PATH \
CONDA_PREFIX=/opt/conda

ENV PATH=/root/.local/bin:/root/.cargo/bin:$PATH \
UV_SYSTEM_PYTHON=1 \
UV_COMPILE_BYTECODE=0 \
UV_HTTP_TIMEOUT=300

# Install system dependencies
RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
ca-certificates \
libssl-dev \
curl \
g++ \
make \
git && \
git \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-dev \
python${PYTHON_VERSION}-venv \
python3-pip \
libnuma-dev && \
rm -rf /var/lib/apt/lists/*

RUN case ${TARGETPLATFORM} in \
"linux/arm64") MAMBA_ARCH=aarch64 ;; \
*) MAMBA_ARCH=x86_64 ;; \
esac && \
curl -fsSL -o ~/mambaforge.sh "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \
bash ~/mambaforge.sh -b -p /opt/conda && \
rm ~/mambaforge.sh

RUN case ${TARGETPLATFORM} in \
"linux/arm64") exit 1 ;; \
*) /opt/conda/bin/conda update -y conda && \
/opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
esac && \
/opt/conda/bin/conda clean -ya
# Set python alias
RUN update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 && \
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1

# Install uv package manager
RUN curl -LsSf https://astral.sh/uv/install.sh | sh

WORKDIR /root
WORKDIR /lightllm

COPY ./requirements.txt /lightllm/requirements.txt
RUN pip install -U pip
RUN pip install -r /lightllm/requirements.txt --no-cache-dir
# Copy project files for dependency installation
COPY pyproject.toml README.md ./
COPY lightllm/__init__.py lightllm/

RUN pip install --no-cache-dir vllm --pre --extra-index-url https://wheels.vllm.ai/nightly
# Install dependencies using uv (faster than pip)
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system -e . --no-cache-dir

# TODO: offline compile
# RUN git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v .
# Install vllm for additional kernels
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system vllm --pre --index-strategy unsafe-best-match --extra-index-url https://wheels.vllm.ai/nightly

RUN apt-get update && apt-get install -y libnuma-dev # for sgl_kernel
# Copy full source code and install in editable mode
COPY . .
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system -e . --no-deps

COPY . /lightllm
RUN pip install -e /lightllm --no-cache-dir
WORKDIR /root
121 changes: 70 additions & 51 deletions docker/Dockerfile.deepep
Original file line number Diff line number Diff line change
@@ -1,84 +1,103 @@
ARG CUDA_VERSION=12.6.1
FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04
ARG PYTHON_VERSION=3.10
ARG MAMBA_VERSION=24.7.1-0
ARG TARGETPLATFORM
ENV PATH=/opt/conda/bin:$PATH \
CONDA_PREFIX=/opt/conda
ARG DEEPEP_COMMIT=b6ce310bb0b75079682d09bc2ebc063a074fbd58

ENV PATH=/root/.local/bin:/root/.cargo/bin:$PATH \
UV_SYSTEM_PYTHON=1 \
UV_COMPILE_BYTECODE=0 \
UV_HTTP_TIMEOUT=300 \
CUDA_HOME=/usr/local/cuda \
GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/

# Install system dependencies
RUN chmod 777 -R /tmp && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
ca-certificates \
libssl-dev \
curl \
g++ \
make \
git && \
git \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-dev \
python${PYTHON_VERSION}-venv \
python3-pip \
libnuma-dev \
wget \
devscripts \
debhelper \
dh-make \
build-essential \
dkms \
ibverbs-providers \
infiniband-diags \
perftest \
rdma-core \
libibverbs-dev \
librdmacm-dev \
cmake && \
rm -rf /var/lib/apt/lists/*

RUN case ${TARGETPLATFORM} in \
"linux/arm64") MAMBA_ARCH=aarch64 ;; \
*) MAMBA_ARCH=x86_64 ;; \
esac && \
curl -fsSL -o ~/mambaforge.sh "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh" && \
bash ~/mambaforge.sh -b -p /opt/conda && \
rm ~/mambaforge.sh

RUN case ${TARGETPLATFORM} in \
"linux/arm64") exit 1 ;; \
*) /opt/conda/bin/conda update -y conda && \
/opt/conda/bin/conda install -y "python=${PYTHON_VERSION}" ;; \
esac && \
/opt/conda/bin/conda clean -ya


WORKDIR /root
# Set python alias
RUN update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 && \
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1

COPY ./requirements.txt /lightllm/requirements.txt
RUN pip install -U pip
RUN pip install -r /lightllm/requirements.txt --no-cache-dir
# Install uv package manager
RUN curl -LsSf https://astral.sh/uv/install.sh | sh

RUN pip install --no-cache-dir vllm --pre --extra-index-url https://wheels.vllm.ai/nightly
WORKDIR /lightllm

# TODO: offline compile
# RUN git clone https://github.com/ModelTC/LightKernel.git && cd LightKernel && pip install --no-deps -v .
# Copy project files for dependency installation
COPY pyproject.toml README.md ./
COPY lightllm/__init__.py lightllm/

RUN apt-get update && apt-get install -y libnuma-dev wget devscripts debhelper dh-make build-essential dkms
RUN apt-get install -y ibverbs-providers infiniband-diags perftest rdma-core libibverbs-dev librdmacm-dev
# Install dependencies using uv (faster than pip)
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system -e . --no-cache-dir

ENV CUDA_HOME=/usr/local/cuda \
GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
# Install vllm for additional kernels
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system vllm --pre --index-strategy unsafe-best-match --extra-index-url https://wheels.vllm.ai/nightly

RUN mkdir -p /tmp/gdrcopy && cd /tmp \
&& git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 \
&& cd gdrcopy/packages \
&& CUDA=/usr/local/cuda ./build-deb-packages.sh \
&& dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb \
&& cd / && rm -rf /tmp/gdrcopy
# Build and install gdrcopy
RUN mkdir -p /tmp/gdrcopy && cd /tmp && \
git clone https://github.com/NVIDIA/gdrcopy.git -b v2.4.4 && \
cd gdrcopy/packages && \
CUDA=/usr/local/cuda ./build-deb-packages.sh && \
dpkg -i gdrdrv-dkms_*.deb libgdrapi_*.deb gdrcopy-tests_*.deb gdrcopy_*.deb && \
cd / && rm -rf /tmp/gdrcopy

# Fix DeepEP IBGDA symlink
# Fix DeepEP IBGDA symlink
RUN ln -sf /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so

RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
&& tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && mv nvshmem_src nvshmem \
&& cd nvshmem \
&& rm -f /root/nvshmem_src_cuda12-all-all-3.3.9.tar.gz \
&& NVSHMEM_SHMEM_SUPPORT=0 \
# Build and install NVSHMEM
RUN wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
mv nvshmem_src nvshmem && \
cd nvshmem && \
rm -f /root/nvshmem_src_cuda12-all-all-3.3.9.tar.gz && \
NVSHMEM_SHMEM_SUPPORT=0 \
NVSHMEM_UCX_SUPPORT=0 \
NVSHMEM_USE_NCCL=0 \
NVSHMEM_MPI_SUPPORT=0 \
NVSHMEM_IBGDA_SUPPORT=1 \
NVSHMEM_PMIX_SUPPORT=0 \
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
NVSHMEM_USE_GDRCOPY=1 \
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 \
&& cmake --build build --target install -j64

ARG DEEPEP_COMMIT=b6ce310bb0b75079682d09bc2ebc063a074fbd58
RUN git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git checkout ${DEEPEP_COMMIT} && cd ..
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/root/nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 && \
cmake --build build --target install -j$(nproc)

WORKDIR /root/DeepEP
# Build and install DeepEP
ENV NVSHMEM_DIR=/root/nvshmem/install
RUN NVSHMEM_DIR=/root/nvshmem/install python setup.py install
RUN git clone https://github.com/deepseek-ai/DeepEP.git /root/DeepEP && \
cd /root/DeepEP && \
git checkout ${DEEPEP_COMMIT} && \
NVSHMEM_DIR=/root/nvshmem/install python setup.py install

# Copy full source code and install in editable mode
COPY . /lightllm
RUN pip install -e /lightllm --no-cache-dir
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system -e /lightllm --no-deps

WORKDIR /root
Loading