diff --git a/text-generation-inference/docker/Dockerfile b/text-generation-inference/docker/Dockerfile index d4cb8c3..d4a7f63 100644 --- a/text-generation-inference/docker/Dockerfile +++ b/text-generation-inference/docker/Dockerfile @@ -6,11 +6,18 @@ RUN mkdir -p /tgi ADD https://github.com/huggingface/text-generation-inference/archive/${TGI_VERSION}.tar.gz /tgi/sources.tar.gz RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1 -# Build cargo components (adapted from TGI original Dockerfile) -# Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04) +# Protoc download stage +FROM alpine AS protoc-downloader +RUN apk add --no-cache curl unzip +RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ + curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ + unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ + unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ + rm -f $PROTOC_ZIP + +# Build cargo components FROM lukemathwalker/cargo-chef:latest-rust-1.79-bookworm AS chef WORKDIR /usr/src - ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse FROM chef AS planner @@ -25,26 +32,19 @@ COPY --from=tgi /tgi/launcher launcher RUN cargo chef prepare --recipe-path recipe.json FROM chef AS builder - -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - python3.11-dev - - -# Run network check before attempting downloads RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + python3.11-dev \ iputils-ping \ curl \ dnsutils + COPY text-generation-inference/docker/check-network.sh /usr/local/bin/check-network RUN chmod +x /usr/local/bin/check-network RUN check-network -RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ - echo "Downloading protoc..." && \ - curl -v -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ - unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ - unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ - rm -f $PROTOC_ZIP +# Copy protoc from downloader stage instead of downloading again +COPY --from=protoc-downloader /usr/local/bin/protoc /usr/local/bin/protoc +COPY --from=protoc-downloader /usr/local/include /usr/local/include COPY --from=planner /usr/src/recipe.json recipe.json RUN cargo chef cook --profile release-opt --recipe-path recipe.json @@ -61,7 +61,6 @@ RUN cargo build --profile release-opt # Python base image FROM ubuntu:22.04 AS base - RUN apt-get update -y \ && apt-get install -y --no-install-recommends \ python3-pip \ @@ -77,7 +76,6 @@ RUN test -n ${VERSION:?} # Python server build image FROM base AS pyserver - RUN apt-get update -y \ && apt-get install -y --no-install-recommends \ make \ @@ -92,12 +90,10 @@ COPY --from=tgi /tgi/proto proto RUN pip3 install -r server/build-requirements.txt RUN VERBOSE=1 BUILDDIR=/pyserver/build PROTODIR=/pyserver/proto VERSION=${VERSION} make -C server gen-server -# TPU base image (used for deployment) +# TPU base image FROM base AS tpu_base - ARG VERSION=${VERSION} -# Install system prerequisites RUN apt-get update -y \ && apt-get install -y --no-install-recommends \ libpython3.10 \ @@ -109,7 +105,6 @@ RUN apt-get update -y \ && rm -rf /var/lib/apt/lists/* \ && apt-get clean -# Update pip RUN pip install --upgrade pip # Install HuggingFace packages @@ -117,7 +112,6 @@ ARG TRANSFORMERS_VERSION='4.41.1' ARG ACCELERATE_VERSION='0.27.2' ARG SAFETENSORS_VERSION='0.4.2' -# TGI base env ENV HUGGINGFACE_HUB_CACHE=/data \ HF_HUB_ENABLE_HF_TRANSFER=1 \ PORT=80 \ @@ -125,36 +119,26 @@ ENV HUGGINGFACE_HUB_CACHE=/data \ COPY . /opt/optimum-tpu -# Install requirements for TGI, that uses python3.11 RUN python3.11 -m pip install transformers==${TRANSFORMERS_VERSION} - -# Install requirements for optimum-tpu, then for TGI then optimum-tpu RUN python3 -m pip install hf_transfer safetensors==${SAFETENSORS_VERSION} typer RUN python3 /opt/optimum-tpu/optimum/tpu/cli.py install-jetstream-pytorch --yes RUN python3 -m pip install -e /opt/optimum-tpu \ -f https://storage.googleapis.com/libtpu-releases/index.html -# Install benchmarker +# Install components from builder COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark -# Install router COPY --from=builder /usr/src/target/release-opt/text-generation-router-v2 /usr/local/bin/text-generation-router -# Install launcher COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher -# Install python server COPY --from=pyserver /pyserver/build/dist dist RUN pip install dist/text_generation_server*.tar.gz - # TPU compatible image for Inference Endpoints FROM tpu_base AS inference-endpoint - COPY text-generation-inference/docker/entrypoint.sh entrypoint.sh RUN chmod +x entrypoint.sh - ENTRYPOINT ["./entrypoint.sh"] # TPU compatible image FROM tpu_base - ENTRYPOINT ["text-generation-launcher"] -CMD ["--json-output"] +CMD ["--json-output"] \ No newline at end of file