From f0209ad56579be3055c6822973a40d83a43f8540 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
Date: Fri, 6 Dec 2024 19:16:37 +0100
Subject: [PATCH] Add Ray and `python-json-logger` for Ray DLC for Training

---
 .../4.42.3/ray/2.33.0/py310/Dockerfile        | 83 +++++++++++++++++++
 1 file changed, 83 insertions(+)
 create mode 100644 containers/pytorch/training/gpu/2.3.0/transformers/4.42.3/ray/2.33.0/py310/Dockerfile

diff --git a/containers/pytorch/training/gpu/2.3.0/transformers/4.42.3/ray/2.33.0/py310/Dockerfile b/containers/pytorch/training/gpu/2.3.0/transformers/4.42.3/ray/2.33.0/py310/Dockerfile
new file mode 100644
index 00000000..4b958a85
--- /dev/null
+++ b/containers/pytorch/training/gpu/2.3.0/transformers/4.42.3/ray/2.33.0/py310/Dockerfile
@@ -0,0 +1,83 @@
+FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
+
+LABEL maintainer="Hugging Face"
+ARG DEBIAN_FRONTEND=noninteractive
+
+# Versions
+ARG CUDA="cu121"
+ARG PYTORCH="2.3.0"
+ARG FLASH_ATTN="2.5.8"
+ARG TRANSFORMERS="4.42.3"
+ARG DIFFUSERS="0.28.0"
+ARG PEFT="0.11.1"
+ARG TRL="0.9.6"
+ARG BITSANDBYTES="0.43.1"
+ARG DATASETS="2.19.1"
+ARG ACCELERATE="0.30.1"
+ARG EVALUATE="0.4.2"
+ARG SENTENCE_TRANSFORMERS="2.7.0"
+ARG DEEPSPEED="0.14.2"
+ARG MAX_JOBS=4
+
+RUN apt-get update && \
+    apt-get install software-properties-common -y && \
+    add-apt-repository ppa:deadsnakes/ppa && \
+    apt-get -y upgrade --only-upgrade systemd openssl cryptsetup && \
+    apt-get install -y \
+    build-essential \
+    bzip2 \
+    curl \
+    git \
+    git-lfs \
+    tar \
+    gcc \
+    g++ \
+    cmake \
+    libprotobuf-dev \
+    libaio-dev \
+    protobuf-compiler \
+    python3-dev \
+    python3-pip \
+    python3.10 \
+    libsndfile1-dev \
+    ffmpeg \
+    && apt-get clean autoremove --yes \
+    && rm -rf /var/lib/{apt,dpkg,cache,log}
+
+# Update pip
+RUN pip install --upgrade pip
+
+# Install latest release PyTorch (PyTorch must be installed before any DeepSpeed c++/cuda ops.)
+RUN pip install --no-cache-dir -U torch==${PYTORCH} torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/${CUDA}
+
+# Upgrade FlashAttnV2
+RUN pip install --no-cache-dir packaging ninja
+RUN MAX_JOBS=${MAX_JOBS} pip install flash-attn==${FLASH_ATTN} --no-build-isolation
+
+# Install Hugging Face Libraries
+RUN pip install --upgrade --no-cache-dir \
+    transformers[sklearn,sentencepiece,vision]==${TRANSFORMERS} \
+    diffusers==${DIFFUSERS} \
+    datasets==${DATASETS} \
+    accelerate==${ACCELERATE} \
+    evaluate==${EVALUATE} \
+    peft==${PEFT} \
+    trl==${TRL} \
+    sentence-transformers==${SENTENCE_TRANSFORMERS} \
+    deepspeed==${DEEPSPEED} \
+    bitsandbytes==${BITSANDBYTES} \
+    tensorboard \
+    jupyter notebook
+
+# Install Ray and python-json-logger
+# https://cloud.google.com/vertex-ai/docs/open-source/ray-on-vertex-ai/create-cluster#custom-image
+RUN pip install --upgrade "ray[all]==2.33.0" "python-json-logger==2.0.7" --no-cache-dir
+
+# Install Google Cloud Dependencies
+RUN pip install --upgrade --no-cache-dir \
+    google-cloud-storage \
+    google-cloud-bigquery \
+    google-cloud-aiplatform \
+    google-cloud-pubsub \
+    google-cloud-logging \
+    "protobuf<4.0.0"