|
1 | | -FROM lukemathwalker/cargo-chef:latest-rust-1.85-bookworm AS chef |
2 | | -WORKDIR /usr/src |
| 1 | +# Dockerfile for TEI with Python backend and CUDA support |
| 2 | +# Supports: L40s (sm_89), RTX 3090 (sm_86) |
| 3 | + |
| 4 | +# ============================================================================= |
| 5 | +# Stage 1: Rust Builder |
| 6 | +# ============================================================================= |
| 7 | +FROM nvidia/cuda:12.4.0-devel-ubuntu22.04 AS rust-builder |
3 | 8 |
|
4 | 9 | ENV SCCACHE=0.10.0 |
5 | 10 | ENV RUSTC_WRAPPER=/usr/local/bin/sccache |
| 11 | +ENV PATH="/root/.cargo/bin:${PATH}" |
| 12 | +ENV CARGO_CHEF=0.1.71 |
| 13 | + |
| 14 | +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ |
| 15 | + curl \ |
| 16 | + libssl-dev \ |
| 17 | + pkg-config \ |
| 18 | + protobuf-compiler \ |
| 19 | + && rm -rf /var/lib/apt/lists/* |
6 | 20 |
|
7 | | -# Donwload, configure sccache |
8 | 21 | RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \ |
9 | 22 | chmod +x /usr/local/bin/sccache |
10 | 23 |
|
11 | | -FROM chef AS planner |
| 24 | +RUN curl https://sh.rustup.rs -sSf | bash -s -- -y |
| 25 | +RUN cargo install cargo-chef --version $CARGO_CHEF --locked |
| 26 | + |
| 27 | +# ============================================================================= |
| 28 | +# Stage 2: Recipe Planner |
| 29 | +# ============================================================================= |
| 30 | +FROM rust-builder AS planner |
| 31 | + |
| 32 | +WORKDIR /usr/src |
12 | 33 |
|
13 | 34 | COPY backends backends |
14 | 35 | COPY core core |
15 | 36 | COPY router router |
16 | 37 | COPY Cargo.toml ./ |
17 | 38 | COPY Cargo.lock ./ |
18 | 39 |
|
19 | | -RUN cargo chef prepare --recipe-path recipe.json |
| 40 | +RUN cargo chef prepare --recipe-path recipe.json |
20 | 41 |
|
21 | | -FROM chef AS builder |
| 42 | +# ============================================================================= |
| 43 | +# Stage 3: Dependency Builder |
| 44 | +# ============================================================================= |
| 45 | +FROM rust-builder AS builder |
22 | 46 |
|
23 | 47 | ARG GIT_SHA |
24 | 48 | ARG DOCKER_LABEL |
25 | 49 |
|
26 | | -# sccache specific variables |
27 | | -ARG SCCACHE_GHA_ENABLED |
28 | | - |
29 | | -RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ |
30 | | - | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \ |
31 | | - echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \ |
32 | | - tee /etc/apt/sources.list.d/oneAPI.list |
33 | | - |
34 | | -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ |
35 | | - intel-oneapi-mkl-devel=2024.0.0-49656 \ |
36 | | - build-essential \ |
37 | | - && rm -rf /var/lib/apt/lists/* |
38 | | - |
39 | | -RUN echo "int mkl_serv_intel_cpu_true() {return 1;}" > fakeintel.c && \ |
40 | | - gcc -shared -fPIC -o libfakeintel.so fakeintel.c |
| 50 | +WORKDIR /usr/src |
41 | 51 |
|
42 | 52 | COPY --from=planner /usr/src/recipe.json recipe.json |
43 | 53 |
|
44 | | -RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ |
45 | | - --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ |
46 | | - cargo chef cook --release --features ort,candle,mkl,static-linking --no-default-features --recipe-path recipe.json && sccache -s |
| 54 | +RUN cargo chef cook --release --features python --features http --recipe-path recipe.json && sccache -s |
47 | 55 |
|
48 | 56 | COPY backends backends |
49 | 57 | COPY core core |
50 | 58 | COPY router router |
51 | 59 | COPY Cargo.toml ./ |
52 | 60 | COPY Cargo.lock ./ |
53 | 61 |
|
54 | | -FROM builder AS http-builder |
| 62 | +RUN cargo build --release --bin text-embeddings-router -F python -F http --no-default-features && sccache -s |
55 | 63 |
|
56 | | -RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ |
57 | | - --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ |
58 | | - cargo build --release --bin text-embeddings-router --features ort,candle,mkl,static-linking,http --no-default-features && sccache -s |
| 64 | +# ============================================================================= |
| 65 | +# Stage 4: Python Environment |
| 66 | +# ============================================================================= |
| 67 | +FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 AS python-builder |
59 | 68 |
|
60 | | -FROM builder AS grpc-builder |
| 69 | +ENV DEBIAN_FRONTEND=noninteractive |
61 | 70 |
|
62 | | -RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ |
63 | | - curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ |
64 | | - unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ |
65 | | - unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ |
66 | | - rm -f $PROTOC_ZIP |
| 71 | +RUN apt-get update && apt-get install -y --no-install-recommends \ |
| 72 | + python3.10 \ |
| 73 | + python3.10-dev \ |
| 74 | + python3-pip \ |
| 75 | + git \ |
| 76 | + && rm -rf /var/lib/apt/lists/* |
67 | 77 |
|
68 | | -COPY proto proto |
| 78 | +RUN ln -sf /usr/bin/python3.10 /usr/bin/python && \ |
| 79 | + ln -sf /usr/bin/python3.10 /usr/bin/python3 |
69 | 80 |
|
70 | | -RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ |
71 | | - --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ |
72 | | - cargo build --release --bin text-embeddings-router --features ort,candle,mkl,static-linking,grpc --no-default-features && sccache -s |
| 81 | +RUN pip install --no-cache-dir --upgrade pip setuptools wheel |
73 | 82 |
|
74 | | -FROM debian:bookworm-slim AS base |
| 83 | +WORKDIR /opt/server |
75 | 84 |
|
76 | | -ENV HUGGINGFACE_HUB_CACHE=/data \ |
77 | | - PORT=80 \ |
78 | | - MKL_ENABLE_INSTRUCTIONS=AVX512_E4 \ |
79 | | - RAYON_NUM_THREADS=8 \ |
80 | | - LD_PRELOAD=/usr/local/libfakeintel.so \ |
81 | | - LD_LIBRARY_PATH=/usr/local/lib |
| 85 | +COPY backends/proto /opt/proto |
| 86 | +COPY backends/python/server /opt/server |
82 | 87 |
|
83 | | -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ |
84 | | - libomp-dev \ |
85 | | - ca-certificates \ |
86 | | - libssl-dev \ |
87 | | - curl \ |
88 | | - && rm -rf /var/lib/apt/lists/* |
| 88 | +RUN pip install grpcio-tools==1.62.2 mypy-protobuf==3.6.0 'types-protobuf' --no-cache-dir && \ |
| 89 | + mkdir -p text_embeddings_server/pb && \ |
| 90 | + python -m grpc_tools.protoc -I/opt/proto --python_out=text_embeddings_server/pb \ |
| 91 | + --grpc_python_out=text_embeddings_server/pb --mypy_out=text_embeddings_server/pb /opt/proto/embed.proto && \ |
| 92 | + find text_embeddings_server/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \; && \ |
| 93 | + touch text_embeddings_server/pb/__init__.py |
89 | 94 |
|
90 | | -# Copy a lot of the Intel shared objects because of the mkl_serv_intel_cpu_true patch... |
91 | | -COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_lp64.so.2 /usr/local/lib/libmkl_intel_lp64.so.2 |
92 | | -COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_thread.so.2 /usr/local/lib/libmkl_intel_thread.so.2 |
93 | | -COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_core.so.2 /usr/local/lib/libmkl_core.so.2 |
94 | | -COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_def.so.2 /usr/local/lib/libmkl_vml_def.so.2 |
95 | | -COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_def.so.2 /usr/local/lib/libmkl_def.so.2 |
96 | | -COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx2.so.2 /usr/local/lib/libmkl_vml_avx2.so.2 |
97 | | -COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx512.so.2 /usr/local/lib/libmkl_vml_avx512.so.2 |
98 | | -COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx2.so.2 /usr/local/lib/libmkl_avx2.so.2 |
99 | | -COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx512.so.2 /usr/local/lib/libmkl_avx512.so.2 |
100 | | -COPY --from=builder /usr/src/libfakeintel.so /usr/local/libfakeintel.so |
| 95 | +RUN pip install --no-cache-dir torch==2.5.1 --index-url https://download.pytorch.org/whl/cu124 |
101 | 96 |
|
102 | | -FROM base AS grpc |
| 97 | +RUN pip install --no-cache-dir -r requirements.txt |
103 | 98 |
|
104 | | -COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router |
| 99 | +RUN pip install --no-cache-dir -e . |
105 | 100 |
|
106 | | -ENTRYPOINT ["text-embeddings-router"] |
107 | | -CMD ["--json-output"] |
| 101 | +# ============================================================================= |
| 102 | +# Stage 5: Final Image |
| 103 | +# ============================================================================= |
| 104 | +FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04 |
| 105 | + |
| 106 | +ENV DEBIAN_FRONTEND=noninteractive |
| 107 | +ENV HUGGINGFACE_HUB_CACHE=/data |
| 108 | +ENV PORT=80 |
| 109 | +ENV TQDM_DISABLE=1 |
| 110 | + |
| 111 | +RUN apt-get update && apt-get install -y --no-install-recommends \ |
| 112 | + python3.10 \ |
| 113 | + python3-pip \ |
| 114 | + ca-certificates \ |
| 115 | + libssl-dev \ |
| 116 | + curl \ |
| 117 | + && rm -rf /var/lib/apt/lists/* |
108 | 118 |
|
109 | | -FROM base AS http |
| 119 | +RUN ln -sf /usr/bin/python3.10 /usr/bin/python && \ |
| 120 | + ln -sf /usr/bin/python3.10 /usr/bin/python3 |
110 | 121 |
|
111 | | -COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router |
| 122 | +COPY --from=python-builder /usr/local/lib/python3.10/dist-packages /usr/local/lib/python3.10/dist-packages |
| 123 | +COPY --from=python-builder /opt/server /opt/server |
112 | 124 |
|
113 | | -# Amazon SageMaker compatible image |
114 | | -FROM http AS sagemaker |
115 | | -COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh |
| 125 | +COPY --from=builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router |
116 | 126 |
|
117 | | -ENTRYPOINT ["./entrypoint.sh"] |
| 127 | +ENV PATH="/usr/local/bin:${PATH}" |
| 128 | +ENV PYTHONPATH="/opt/server:${PYTHONPATH}" |
118 | 129 |
|
119 | | -# Default image |
120 | | -FROM http |
| 130 | +WORKDIR /opt/server |
121 | 131 |
|
122 | 132 | ENTRYPOINT ["text-embeddings-router"] |
123 | 133 | CMD ["--json-output"] |
0 commit comments