From f0209ad56579be3055c6822973a40d83a43f8540 Mon Sep 17 00:00:00 2001 From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com> Date: Fri, 6 Dec 2024 19:16:37 +0100 Subject: [PATCH] Add Ray and `python-json-logger` for Ray DLC for Training --- .../4.42.3/ray/2.33.0/py310/Dockerfile | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 containers/pytorch/training/gpu/2.3.0/transformers/4.42.3/ray/2.33.0/py310/Dockerfile diff --git a/containers/pytorch/training/gpu/2.3.0/transformers/4.42.3/ray/2.33.0/py310/Dockerfile b/containers/pytorch/training/gpu/2.3.0/transformers/4.42.3/ray/2.33.0/py310/Dockerfile new file mode 100644 index 00000000..4b958a85 --- /dev/null +++ b/containers/pytorch/training/gpu/2.3.0/transformers/4.42.3/ray/2.33.0/py310/Dockerfile @@ -0,0 +1,83 @@ +FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 + +LABEL maintainer="Hugging Face" +ARG DEBIAN_FRONTEND=noninteractive + +# Versions +ARG CUDA="cu121" +ARG PYTORCH="2.3.0" +ARG FLASH_ATTN="2.5.8" +ARG TRANSFORMERS="4.42.3" +ARG DIFFUSERS="0.28.0" +ARG PEFT="0.11.1" +ARG TRL="0.9.6" +ARG BITSANDBYTES="0.43.1" +ARG DATASETS="2.19.1" +ARG ACCELERATE="0.30.1" +ARG EVALUATE="0.4.2" +ARG SENTENCE_TRANSFORMERS="2.7.0" +ARG DEEPSPEED="0.14.2" +ARG MAX_JOBS=4 + +RUN apt-get update && \ + apt-get install software-properties-common -y && \ + add-apt-repository ppa:deadsnakes/ppa && \ + apt-get -y upgrade --only-upgrade systemd openssl cryptsetup && \ + apt-get install -y \ + build-essential \ + bzip2 \ + curl \ + git \ + git-lfs \ + tar \ + gcc \ + g++ \ + cmake \ + libprotobuf-dev \ + libaio-dev \ + protobuf-compiler \ + python3-dev \ + python3-pip \ + python3.10 \ + libsndfile1-dev \ + ffmpeg \ + && apt-get clean autoremove --yes \ + && rm -rf /var/lib/{apt,dpkg,cache,log} + +# Update pip +RUN pip install --upgrade pip + +# Install latest release PyTorch (PyTorch must be installed before any DeepSpeed c++/cuda ops.) +RUN pip install --no-cache-dir -U torch==${PYTORCH} torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/${CUDA} + +# Upgrade FlashAttnV2 +RUN pip install --no-cache-dir packaging ninja +RUN MAX_JOBS=${MAX_JOBS} pip install flash-attn==${FLASH_ATTN} --no-build-isolation + +# Install Hugging Face Libraries +RUN pip install --upgrade --no-cache-dir \ + transformers[sklearn,sentencepiece,vision]==${TRANSFORMERS} \ + diffusers==${DIFFUSERS} \ + datasets==${DATASETS} \ + accelerate==${ACCELERATE} \ + evaluate==${EVALUATE} \ + peft==${PEFT} \ + trl==${TRL} \ + sentence-transformers==${SENTENCE_TRANSFORMERS} \ + deepspeed==${DEEPSPEED} \ + bitsandbytes==${BITSANDBYTES} \ + tensorboard \ + jupyter notebook + +# Install Ray and python-json-logger +# https://cloud.google.com/vertex-ai/docs/open-source/ray-on-vertex-ai/create-cluster#custom-image +RUN pip install --upgrade "ray[all]==2.33.0" "python-json-logger==2.0.7" --no-cache-dir + +# Install Google Cloud Dependencies +RUN pip install --upgrade --no-cache-dir \ + google-cloud-storage \ + google-cloud-bigquery \ + google-cloud-aiplatform \ + google-cloud-pubsub \ + google-cloud-logging \ + "protobuf<4.0.0"