Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add containers/tei/{cpu,gpu}/1.6.0 #132

Merged
merged 6 commits into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 123 additions & 0 deletions containers/tei/cpu/1.6.0/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# Fetch and extract the TEI sources
FROM alpine AS tei

RUN mkdir -p /tei
ADD https://github.com/huggingface/text-embeddings-inference/archive/refs/tags/v1.6.0.tar.gz /tei/sources.tar.gz
RUN tar -C /tei -xf /tei/sources.tar.gz --strip-components=1

# Build cargo components (adapted from TEI original Dockerfile)
FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef
WORKDIR /usr/src

ENV SCCACHE=0.5.4
ENV RUSTC_WRAPPER=/usr/local/bin/sccache

# Donwload, configure sccache
RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \
chmod +x /usr/local/bin/sccache

FROM chef AS planner

COPY --from=tei /tei/backends backends
COPY --from=tei /tei/core core
COPY --from=tei /tei/router router
COPY --from=tei /tei/Cargo.toml Cargo.toml
COPY --from=tei /tei/Cargo.lock Cargo.lock

RUN cargo chef prepare --recipe-path recipe.json

FROM chef AS builder

RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
| gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \
tee /etc/apt/sources.list.d/oneAPI.list

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
intel-oneapi-mkl-devel=2024.0.0-49656 \
build-essential \
&& rm -rf /var/lib/apt/lists/*

RUN echo "int mkl_serv_intel_cpu_true() {return 1;}" > fakeintel.c && \
gcc -shared -fPIC -o libfakeintel.so fakeintel.c

COPY --from=planner /usr/src/recipe.json recipe.json

RUN cargo chef cook --release --features ort --features candle --features mkl-dynamic --features google --no-default-features --recipe-path recipe.json && sccache -s

COPY --from=tei /tei/backends backends
COPY --from=tei /tei/core core
COPY --from=tei /tei/router router
COPY --from=tei /tei/Cargo.toml Cargo.toml
COPY --from=tei /tei/Cargo.lock Cargo.lock

FROM builder AS http-builder

RUN cargo build --release --bin text-embeddings-router -F ort -F candle -F mkl-dynamic -F http -F google --no-default-features && sccache -s

FROM builder AS grpc-builder

RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
rm -f $PROTOC_ZIP

COPY --from=tei /tei/proto proto

RUN cargo build --release --bin text-embeddings-router -F grpc -F ort -F candle -F mkl-dynamic -F google --no-default-features && sccache -s

FROM debian:bookworm-slim AS base

ENV HUGGINGFACE_HUB_CACHE=/tmp \
PORT=8080 \
MKL_ENABLE_INSTRUCTIONS=AVX512_E4 \
RAYON_NUM_THREADS=4 \
LD_PRELOAD=/usr/local/libfakeintel.so \
LD_LIBRARY_PATH=/usr/local/lib

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
libomp-dev \
ca-certificates \
libssl-dev \
curl \
&& rm -rf /var/lib/apt/lists/*

# Copy a lot of the Intel shared objects because of the mkl_serv_intel_cpu_true patch...
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_lp64.so.2 /usr/local/lib/libmkl_intel_lp64.so.2
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_thread.so.2 /usr/local/lib/libmkl_intel_thread.so.2
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_core.so.2 /usr/local/lib/libmkl_core.so.2
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_def.so.2 /usr/local/lib/libmkl_vml_def.so.2
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_def.so.2 /usr/local/lib/libmkl_def.so.2
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx2.so.2 /usr/local/lib/libmkl_vml_avx2.so.2
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx512.so.2 /usr/local/lib/libmkl_vml_avx512.so.2
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx2.so.2 /usr/local/lib/libmkl_avx2.so.2
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx512.so.2 /usr/local/lib/libmkl_avx512.so.2
COPY --from=builder /usr/src/libfakeintel.so /usr/local/libfakeintel.so

# Install Google CLI single command
RUN apt-get update -y && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg curl && \
echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \
| apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \
apt-get update -y && \
apt-get install google-cloud-sdk -y

# COPY custom entrypoint for Google
COPY --chmod=775 containers/tei/cpu/1.6.0/entrypoint.sh entrypoint.sh

FROM base AS grpc

COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router

ENTRYPOINT ["./entrypoint.sh"]
CMD ["--json-output"]

FROM base AS http

COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router

ENTRYPOINT ["./entrypoint.sh"]
CMD ["--json-output"]
30 changes: 30 additions & 0 deletions containers/tei/cpu/1.6.0/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

# Check if MODEL_ID starts with "gcs://"
if [[ $AIP_STORAGE_URI == gs://* ]]; then
echo "AIP_STORAGE_URI set and starts with 'gs://', proceeding to download from GCS."
echo "AIP_STORAGE_URI: $AIP_STORAGE_URI"

# Define the target directory
TARGET_DIR="/tmp/model"
mkdir -p "$TARGET_DIR"

# Use gsutil to copy the content from GCS to the target directory
echo "Running: gcloud storage storage cp $AIP_STORAGE_URI/* $TARGET_DIR --recursive"
gcloud storage cp "$AIP_STORAGE_URI/*" "$TARGET_DIR" --recursive

# Check if gsutil command was successful
if [ $? -eq 0 ]; then
echo "Model downloaded successfully to ${TARGET_DIR}."
# Update MODEL_ID to point to the local directory
echo "Updating MODEL_ID to point to the local directory."
export MODEL_ID="$TARGET_DIR"
else
echo "Failed to download model from GCS."
exit 1
fi
fi

ldconfig 2>/dev/null || echo "unable to refresh ld cache, not a big deal in most cases"

exec text-embeddings-router $@
113 changes: 113 additions & 0 deletions containers/tei/gpu/1.6.0/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Fetch and extract the TEI sources
FROM alpine AS tei

RUN mkdir -p /tei
ADD https://github.com/huggingface/text-embeddings-inference/archive/refs/tags/v1.6.0.tar.gz /tei/sources.tar.gz
RUN tar -C /tei -xf /tei/sources.tar.gz --strip-components=1

# Build cargo components (adapted from TEI original Dockerfile)
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder

ENV SCCACHE=0.5.4
ENV RUSTC_WRAPPER=/usr/local/bin/sccache
ENV PATH="/root/.cargo/bin:${PATH}"

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
curl \
libssl-dev \
pkg-config \
&& rm -rf /var/lib/apt/lists/*

# Donwload and configure sccache
RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \
chmod +x /usr/local/bin/sccache

RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
RUN cargo install cargo-chef --locked

FROM base-builder AS planner

WORKDIR /usr/src

COPY --from=tei /tei/backends backends
COPY --from=tei /tei/core core
COPY --from=tei /tei/router router
COPY --from=tei /tei/Cargo.toml Cargo.toml
COPY --from=tei /tei/Cargo.lock Cargo.lock

RUN cargo chef prepare --recipe-path recipe.json

FROM base-builder AS builder

WORKDIR /usr/src

COPY --from=planner /usr/src/recipe.json recipe.json

RUN cargo chef cook --release --features google --recipe-path recipe.json && sccache -s

FROM builder AS builder-75

RUN CUDA_COMPUTE_CAP=75 cargo chef cook --release --features google --features candle-cuda-turing --recipe-path recipe.json && sccache -s

COPY --from=tei /tei/backends backends
COPY --from=tei /tei/core core
COPY --from=tei /tei/router router
COPY --from=tei /tei/Cargo.toml Cargo.toml
COPY --from=tei /tei/Cargo.lock Cargo.lock

RUN CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F google && sccache -s

FROM builder AS builder-80

RUN CUDA_COMPUTE_CAP=80 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s

COPY --from=tei /tei/backends backends
COPY --from=tei /tei/core core
COPY --from=tei /tei/router router
COPY --from=tei /tei/Cargo.toml Cargo.toml
COPY --from=tei /tei/Cargo.lock Cargo.lock

RUN CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s

FROM builder AS builder-90

RUN CUDA_COMPUTE_CAP=90 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s

COPY --from=tei /tei/backends backends
COPY --from=tei /tei/core core
COPY --from=tei /tei/router router
COPY --from=tei /tei/Cargo.toml Cargo.toml
COPY --from=tei /tei/Cargo.lock Cargo.lock

RUN CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s

FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base

ENV HUGGINGFACE_HUB_CACHE=/tmp \
PORT=8080 \
USE_FLASH_ATTENTION=True

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
ca-certificates \
libssl-dev \
curl \
&& rm -rf /var/lib/apt/lists/*

COPY --from=builder-75 /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router-75
COPY --from=builder-80 /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router-80
COPY --from=builder-90 /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router-90

# Install Google CLI single command
RUN apt-get update -y && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg curl && \
echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \
| apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \
apt-get update -y && \
apt-get install google-cloud-sdk -y

# COPY custom entrypoint for Google
COPY --chmod=775 containers/tei/gpu/1.6.0/entrypoint.sh entrypoint.sh
ENTRYPOINT ["./entrypoint.sh"]
CMD ["--json-output"]
48 changes: 48 additions & 0 deletions containers/tei/gpu/1.6.0/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash

# Check if MODEL_ID starts with "gcs://"
if [[ $AIP_STORAGE_URI == gs://* ]]; then
echo "AIP_STORAGE_URI set and starts with 'gs://', proceeding to download from GCS."
echo "AIP_STORAGE_URI: $AIP_STORAGE_URI"

# Define the target directory
TARGET_DIR="/tmp/model"
mkdir -p "$TARGET_DIR"

# Use gsutil to copy the content from GCS to the target directory
echo "Running: gcloud storage storage cp $AIP_STORAGE_URI/* $TARGET_DIR --recursive"
gcloud storage cp "$AIP_STORAGE_URI/*" "$TARGET_DIR" --recursive

# Check if gsutil command was successful
if [ $? -eq 0 ]; then
echo "Model downloaded successfully to ${TARGET_DIR}."
# Update MODEL_ID to point to the local directory
echo "Updating MODEL_ID to point to the local directory."
export MODEL_ID="$TARGET_DIR"
else
echo "Failed to download model from GCS."
exit 1
fi
fi

ldconfig 2>/dev/null || echo "unable to refresh ld cache, not a big deal in most cases"

# Below is the original `cuda-all-entrypoint.sh` script.
# Reference: https://github.com/huggingface/text-embeddings-inference/blob/v1.5.1/cuda-all-entrypoint.sh
if ! command -v nvidia-smi &>/dev/null; then
echo "Error: 'nvidia-smi' command not found."
exit 1
fi

compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g')

if [ ${compute_cap} -eq 75 ]; then
exec text-embeddings-router-75 "$@"
elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 90 ]; then
exec text-embeddings-router-80 "$@"
elif [ ${compute_cap} -eq 90 ]; then
exec text-embeddings-router-90 "$@"
else
echo "cuda compute cap ${compute_cap} is not supported"
exit 1
fi
Loading