-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add
pytorch/inference/gpu/2.3.1/transformers/4.46.0/py311
(WIP)
- Include missing `requirements.txt` installation in `entrypoint.sh` (required to install custom dependencies with custom models) - Fix Python 3.11 installation as it was not properly installed and Python 3.10 was used instead - Use `uv` to install the dependencies as it's way faster than default `pip` - Also `uv` is able to successfully install `kenlm` which is a `transformers` dependency that `pip` is not able to install when building the `Dockerfile` - Tested with some of the latest models that those bumped dependencies support as Gemma2, Llama3.2, StableDiffusion 3.5, and much more
- Loading branch information
1 parent
631b103
commit b6e102e
Showing
2 changed files
with
109 additions
and
0 deletions.
There are no files selected for viewing
65 changes: 65 additions & 0 deletions
65
containers/pytorch/inference/gpu/2.3.1/transformers/4.46.0/py311/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 | ||
SHELL ["/bin/bash", "-c"] | ||
|
||
LABEL maintainer="Hugging Face" | ||
|
||
ENV DEBIAN_FRONTEND=noninteractive | ||
|
||
WORKDIR /app | ||
|
||
# Install required dependencies | ||
RUN apt-get update && \ | ||
apt-get install software-properties-common -y && \ | ||
add-apt-repository ppa:deadsnakes/ppa && \ | ||
apt-get -y upgrade --only-upgrade systemd openssl cryptsetup && \ | ||
apt-get install -y \ | ||
build-essential \ | ||
bzip2 \ | ||
curl \ | ||
git \ | ||
git-lfs \ | ||
tar \ | ||
gcc \ | ||
g++ \ | ||
cmake \ | ||
libprotobuf-dev \ | ||
protobuf-compiler \ | ||
python3.11 \ | ||
python3.11-dev \ | ||
libsndfile1-dev \ | ||
ffmpeg && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
# Set Python 3.11 as the default python version | ||
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1 && \ | ||
ln -sf /usr/bin/python3.11 /usr/bin/python | ||
|
||
# Install pip from source | ||
RUN curl -O https://bootstrap.pypa.io/get-pip.py && \ | ||
python get-pip.py && \ | ||
rm get-pip.py | ||
|
||
# Install uv and upgrade setuptools | ||
RUN pip install --upgrade uv && \ | ||
uv pip install --upgrade setuptools --no-cache-dir --system | ||
|
||
# Hugging Face Inference Toolkit | ||
ARG HF_INFERENCE_TOOLKIT_VERSION=bump-dependencies | ||
ARG HF_INFERENCE_TOOLKIT_URL=git+https://github.com/huggingface/huggingface-inference-toolkit.git@${HF_INFERENCE_TOOLKIT_VERSION} | ||
RUN uv pip install --upgrade "${HF_INFERENCE_TOOLKIT_URL}#egg=huggingface-inference-toolkit[torch,diffusers,st,google]" --no-cache-dir --system | ||
|
||
ENV HF_HUB_ENABLE_HF_TRANSFER="1" | ||
|
||
# Install Google CLI single command | ||
RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \ | ||
| tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ | ||
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \ | ||
| apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \ | ||
apt-get update -y && \ | ||
apt-get install google-cloud-sdk -y && \ | ||
apt-get clean autoremove --yes && \ | ||
rm -rf /var/lib/{apt,dpkg,cache,log} | ||
|
||
# Copy entrypoint and change permissions | ||
COPY --chmod=0755 containers/pytorch/inference/gpu/2.3.1/transformers/4.46.0/py311/entrypoint.sh entrypoint.sh | ||
ENTRYPOINT ["bash", "-c", "./entrypoint.sh"] |
44 changes: 44 additions & 0 deletions
44
containers/pytorch/inference/gpu/2.3.1/transformers/4.46.0/py311/entrypoint.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#!/bin/bash | ||
|
||
# Define the default port | ||
PORT=5000 | ||
|
||
# Check if AIP_MODE is set and adjust the port for Vertex AI | ||
if [[ ! -z "${AIP_MODE}" ]]; then | ||
PORT=${AIP_HTTP_PORT} | ||
fi | ||
|
||
# Check if MODEL_ID starts with "gcs://" | ||
if [[ $AIP_STORAGE_URI == gs://* ]]; then | ||
echo "AIP_STORAGE_URI set and starts with 'gs://', proceeding to download from GCS." | ||
echo "AIP_STORAGE_URI: $AIP_STORAGE_URI" | ||
|
||
# Define the target directory | ||
TARGET_DIR="/opt/huggingface/model" | ||
mkdir -p "$TARGET_DIR" | ||
|
||
# Use gsutil to copy the content from GCS to the target directory | ||
echo "Running: gsutil -m cp -e -r "$AIP_STORAGE_URI/*" "$TARGET_DIR"" | ||
gsutil -m cp -e -r "$AIP_STORAGE_URI/*" "$TARGET_DIR" | ||
|
||
# Check if gsutil command was successful | ||
if [ $? -eq 0 ]; then | ||
echo "Model downloaded successfully to ${TARGET_DIR}." | ||
# Update MODEL_ID to point to the local directory | ||
echo "Updating MODEL_ID to point to the local directory." | ||
export HF_MODEL_DIR="$TARGET_DIR" | ||
export AIP_STORAGE_URI="" | ||
else | ||
echo "Failed to download model from GCS." | ||
exit 1 | ||
fi | ||
|
||
# Check if requirements.txt exists and if so install dependencies | ||
if [ -f "${HF_MODEL_DIR}/requirements.txt" ]; then | ||
echo "Installing custom dependencies from ${HF_MODEL_DIR}/requirements.txt" | ||
uv pip install -r ${HF_MODEL_DIR}/requirements.txt --no-cache-dir --system | ||
fi | ||
fi | ||
|
||
# Start the server | ||
uvicorn huggingface_inference_toolkit.webservice_starlette:app --host 0.0.0.0 --port ${PORT} |