File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 6464 docker push "$DOCKER_ORG/xinference:${IMAGE_TAG}"
6565 docker build -t "$DOCKER_ORG/xinference:${IMAGE_TAG}-cpu" --progress=plain -f xinference/deploy/docker/Dockerfile.cpu .
6666 docker push "$DOCKER_ORG/xinference:${IMAGE_TAG}-cpu"
67+ docker buildx build --platform linux/arm64 --push -t "$DOCKER_ORG/xinference:${IMAGE_TAG}-aarch64" --progress=plain -f xinference/deploy/docker/Dockerfile.aarch64 .
68+ docker push "$DOCKER_ORG/xinference:${IMAGE_TAG}-aarch64"
6769 echo "XINFERENCE_IMAGE_TAG=${IMAGE_TAG}" >> $GITHUB_ENV
6870 done
6971
7274 docker push "$DOCKER_ORG/xinference:latest"
7375 docker tag "$DOCKER_ORG/xinference:${GIT_TAG}-cpu" "$DOCKER_ORG/xinference:latest-cpu"
7476 docker push "$DOCKER_ORG/xinference:latest-cpu"
77+ docker tag "$DOCKER_ORG/xinference:${GIT_TAG}-aarch64" "$DOCKER_ORG/xinference:latest-aarch64"
78+ docker push "$DOCKER_ORG/xinference:latest-aarch64"
7579 echo "XINFERENCE_GIT_TAG=${GIT_TAG}" >> $GITHUB_ENV
7680 fi
7781
Original file line number Diff line number Diff line change 1- FROM vllm/vllm-openai:v0.13.0
1+ FROM vllm/vllm-openai:v0.17.1
22
33COPY . /opt/inference
44WORKDIR /opt/inference
@@ -49,9 +49,9 @@ RUN pip install --upgrade -i "$PIP_INDEX" pip "setuptools<82" wheel && \
4949 pip install -i "$PIP_INDEX" wetext && \
5050 pip uninstall flashinfer -y && \
5151 pip install -i "$PIP_INDEX" flashinfer-python==0.5.3 flashinfer-cubin==0.5.3 && \
52- pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.5.3/flashinfer_jit_cache-0.5.3+cu129-cp39-abi3-manylinux_2_28_x86_64.whl && \
5352 pip install -i "$PIP_INDEX" SQLAlchemy==1.4.54 && \
54- pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.5.4/flash_attn-2.8.3+cu128torch2.9-cp312-cp312-linux_x86_64.whl && \
53+ pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.5.3/flashinfer_jit_cache-0.5.3+cu129-cp39-abi3-manylinux_2_28_x86_64.whl && \
54+ pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.7.12/flash_attn-2.8.3+cu128torch2.10-cp312-cp312-linux_x86_64.whl && \
5555 cd /opt/inference && \
5656 python3 setup.py build_web && \
5757 git restore . && \
Original file line number Diff line number Diff line change 1+ FROM vllm/vllm-openai:v0.17.1-aarch64
2+
3+ COPY . /opt/inference
4+
5+ WORKDIR /opt/inference
6+
7+ ARG PIP_INDEX=https://pypi.org/simple
8+
9+ RUN apt-get update -y && \
10+ pip install --upgrade pip setuptools==79.0.1 wheel && \
11+ apt-get install -y wget libpcre3 libpcre3-dev gcc rsync zlib1g zlib1g-dev git libssl-dev && \
12+ apt-get clean all && \
13+ cd /opt/inference/ && \
14+ pip install --upgrade-strategy only-if-needed -r /opt/inference/xinference/deploy/docker/requirements_aarch64/requirements_aarch64.txt && \
15+ pip install --no-deps --no-build-isolation "." && \
16+ cd /opt/inference && \
17+ python3 setup.py build_web && \
18+ git restore . && \
19+ pip install -i "$PIP_INDEX" --no-deps "." && \
20+ pip install -i "$PIP_INDEX" "xllamacpp>=0.2.0" && \
21+ pip cache purge
22+
23+ RUN pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128 \
24+ --no-deps && \
25+ pip install triton && \
26+ pip install torchcodec && \
27+ pip cache purge
28+
29+ ENTRYPOINT []
Original file line number Diff line number Diff line change @@ -52,4 +52,4 @@ xgrammar>=0.1.10
5252cuda-python
5353sgl-kernel>=0.0.3.post3,<=0.1.4
5454IPython
55- numpy==1.26.4
55+ numpy==2.2.6
Original file line number Diff line number Diff line change 1+ xoscar>=0.7.1
2+ gradio==4.26.0
3+ typer[all]<0.12.0 # fix typer required by gradio
4+ pillow
5+ click
6+ tqdm>=4.27
7+ tabulate
8+ requests
9+ pydantic
10+ fastapi==0.110.3
11+ uvicorn
12+ huggingface-hub>=0.19.4
13+ typing_extensions
14+ modelscope>=1.10.0
15+ sse_starlette>=1.6.5 # ensure_bytes API break change: https://github.com/sysid/sse-starlette/issues/65
16+ openai>=1.40.0 # For typing
17+ python-jose[cryptography]
18+ bcrypt>=4.0.0
19+ aioprometheus[starlette]>=23.12.0
20+ pynvml
21+ async-timeout
22+ peft>=0.17.0
23+ opencv-contrib-python
24+ sqlalchemy==1.4.54
25+ fastapi_pagination
26+ python-dotenv
27+ langfuse==3.3.0
28+ piexif
29+ aiohttp
30+ prometheus_client
31+ psycopg2-binary
32+ numpy==2.2.6
You can’t perform that action at this time.
0 commit comments