Skip to content

Commit ba7167f

Browse files
authored
feat: Upgrade the base image to version 0.17.1 and add support for aarch64 version images (#4726)
1 parent 003c6cd commit ba7167f

5 files changed

Lines changed: 69 additions & 4 deletions

File tree

.github/workflows/docker-cd.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ jobs:
6464
docker push "$DOCKER_ORG/xinference:${IMAGE_TAG}"
6565
docker build -t "$DOCKER_ORG/xinference:${IMAGE_TAG}-cpu" --progress=plain -f xinference/deploy/docker/Dockerfile.cpu .
6666
docker push "$DOCKER_ORG/xinference:${IMAGE_TAG}-cpu"
67+
docker buildx build --platform linux/arm64 --push -t "$DOCKER_ORG/xinference:${IMAGE_TAG}-aarch64" --progress=plain -f xinference/deploy/docker/Dockerfile.aarch64 .
68+
docker push "$DOCKER_ORG/xinference:${IMAGE_TAG}-aarch64"
6769
echo "XINFERENCE_IMAGE_TAG=${IMAGE_TAG}" >> $GITHUB_ENV
6870
done
6971
@@ -72,6 +74,8 @@ jobs:
7274
docker push "$DOCKER_ORG/xinference:latest"
7375
docker tag "$DOCKER_ORG/xinference:${GIT_TAG}-cpu" "$DOCKER_ORG/xinference:latest-cpu"
7476
docker push "$DOCKER_ORG/xinference:latest-cpu"
77+
docker tag "$DOCKER_ORG/xinference:${GIT_TAG}-aarch64" "$DOCKER_ORG/xinference:latest-aarch64"
78+
docker push "$DOCKER_ORG/xinference:latest-aarch64"
7579
echo "XINFERENCE_GIT_TAG=${GIT_TAG}" >> $GITHUB_ENV
7680
fi
7781

xinference/deploy/docker/Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM vllm/vllm-openai:v0.13.0
1+
FROM vllm/vllm-openai:v0.17.1
22

33
COPY . /opt/inference
44
WORKDIR /opt/inference
@@ -49,9 +49,9 @@ RUN pip install --upgrade -i "$PIP_INDEX" pip "setuptools<82" wheel && \
4949
pip install -i "$PIP_INDEX" wetext && \
5050
pip uninstall flashinfer -y && \
5151
pip install -i "$PIP_INDEX" flashinfer-python==0.5.3 flashinfer-cubin==0.5.3 && \
52-
pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.5.3/flashinfer_jit_cache-0.5.3+cu129-cp39-abi3-manylinux_2_28_x86_64.whl && \
5352
pip install -i "$PIP_INDEX" SQLAlchemy==1.4.54 && \
54-
pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.5.4/flash_attn-2.8.3+cu128torch2.9-cp312-cp312-linux_x86_64.whl && \
53+
pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.5.3/flashinfer_jit_cache-0.5.3+cu129-cp39-abi3-manylinux_2_28_x86_64.whl && \
54+
pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.7.12/flash_attn-2.8.3+cu128torch2.10-cp312-cp312-linux_x86_64.whl && \
5555
cd /opt/inference && \
5656
python3 setup.py build_web && \
5757
git restore . && \
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
FROM vllm/vllm-openai:v0.17.1-aarch64
2+
3+
COPY . /opt/inference
4+
5+
WORKDIR /opt/inference
6+
7+
ARG PIP_INDEX=https://pypi.org/simple
8+
9+
RUN apt-get update -y && \
10+
pip install --upgrade pip setuptools==79.0.1 wheel && \
11+
apt-get install -y wget libpcre3 libpcre3-dev gcc rsync zlib1g zlib1g-dev git libssl-dev && \
12+
apt-get clean all && \
13+
cd /opt/inference/ && \
14+
pip install --upgrade-strategy only-if-needed -r /opt/inference/xinference/deploy/docker/requirements_aarch64/requirements_aarch64.txt && \
15+
pip install --no-deps --no-build-isolation "." && \
16+
cd /opt/inference && \
17+
python3 setup.py build_web && \
18+
git restore . && \
19+
pip install -i "$PIP_INDEX" --no-deps "." && \
20+
pip install -i "$PIP_INDEX" "xllamacpp>=0.2.0" && \
21+
pip cache purge
22+
23+
RUN pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128 \
24+
--no-deps && \
25+
pip install triton && \
26+
pip install torchcodec && \
27+
pip cache purge
28+
29+
ENTRYPOINT []

xinference/deploy/docker/requirements/requirements-ml.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,4 +52,4 @@ xgrammar>=0.1.10
5252
cuda-python
5353
sgl-kernel>=0.0.3.post3,<=0.1.4
5454
IPython
55-
numpy==1.26.4
55+
numpy==2.2.6
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
xoscar>=0.7.1
2+
gradio==4.26.0
3+
typer[all]<0.12.0 # fix typer required by gradio
4+
pillow
5+
click
6+
tqdm>=4.27
7+
tabulate
8+
requests
9+
pydantic
10+
fastapi==0.110.3
11+
uvicorn
12+
huggingface-hub>=0.19.4
13+
typing_extensions
14+
modelscope>=1.10.0
15+
sse_starlette>=1.6.5 # ensure_bytes API break change: https://github.com/sysid/sse-starlette/issues/65
16+
openai>=1.40.0 # For typing
17+
python-jose[cryptography]
18+
bcrypt>=4.0.0
19+
aioprometheus[starlette]>=23.12.0
20+
pynvml
21+
async-timeout
22+
peft>=0.17.0
23+
opencv-contrib-python
24+
sqlalchemy==1.4.54
25+
fastapi_pagination
26+
python-dotenv
27+
langfuse==3.3.0
28+
piexif
29+
aiohttp
30+
prometheus_client
31+
psycopg2-binary
32+
numpy==2.2.6

0 commit comments

Comments
 (0)