From 7522fa87d6cb0a6ed20ce4d92dd44a308f98ab9f Mon Sep 17 00:00:00 2001 From: ekoc Date: Thu, 19 Dec 2024 11:09:03 +0100 Subject: [PATCH] Adding docker build for DeepSpeed --- .gitignore | 5 +- Dockerfile | 61 +++++++++++++++------ deepspeed/Dockerfile | 103 +++++++++++++++++++++++++++++++++++ deepspeed/build-deepspeed.sh | 65 ++++++++++++++++++++++ docker-build.sh | 32 +++++++++++ 5 files changed, 249 insertions(+), 17 deletions(-) create mode 100644 deepspeed/Dockerfile create mode 100755 deepspeed/build-deepspeed.sh diff --git a/.gitignore b/.gitignore index 563c2279..4428008c 100644 --- a/.gitignore +++ b/.gitignore @@ -170,4 +170,7 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -.idea/* \ No newline at end of file +.idea/* + +# Deepspeed build +deepspeed/build \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 69ac1374..d8655100 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,12 +4,22 @@ FROM continuumio/miniconda3:24.7.1-0 ARG TTS_MODEL="xtts" ENV TTS_MODEL=$TTS_MODEL +ARG CUDA_VERSION="12.1.1" +ENV CUDA_VERSION=$CUDA_VERSION + +ARG PYTHON_VERSION=3.11.9 +ENV PYTHON_VERSION=$PYTHON_VERSION + +ARG PYTORCH_VERSION=2.2.1 +ENV PYTORCH_VERSION=$PYTORCH_VERSION + SHELL ["/bin/bash", "-l", "-c"] ENV SHELL=/bin/bash ENV HOST=0.0.0.0 ENV DEBIAN_FRONTEND=noninteractive ENV CUDA_DOCKER_ARCH=all ENV GRADIO_SERVER_NAME="0.0.0.0" +ENV NVIDIA_VISIBLE_DEVICES=all RUN <&1 ) + + if echo $RESULT | grep -izq error ; then + echo "Failed to install conda dependencies 2: $RESULT" + exit 1 + fi conda clean -a && pip cache purge EOR @@ -61,22 +78,34 @@ RUN <&1 ) + + if echo $RESULT | grep -izq error ; then + echo "Failed to install pip dependencies: $RESULT" + exit 1 + fi + rm ${DEEPSPEED_WHEEL} conda clean --all --force-pkgs-dirs -y && pip cache purge EOR -# Deepspeed requires cutlass: -RUN git clone --depth 1 --branch "v3.5.1" https://github.com/NVIDIA/cutlass /alltalk/cutlass -ENV CUTLASS_PATH=/alltalk/cutlass +### Deepspeed requires cutlass: +###RUN git clone --depth 1 --branch "v3.5.1" https://github.com/NVIDIA/cutlass /alltalk/cutlass +###ENV CUTLASS_PATH=/alltalk/cutlass # Writing scripts to start alltalk: RUN < build_deepspeed.sh +#!/usr/bin/env bash +mkdir -p /deepspeed +cd ${STAGE_DIR}/DeepSpeed +DS_BUILD_OPS=1 python setup.py build_ext -j8 bdist_wheel +mv ${STAGE_DIR}/DeepSpeed/dist/*.whl /deepspeed/ +EOF +EOR + +RUN chmod +x /build_deepspeed.sh + +ENTRYPOINT ["/build_deepspeed.sh"] \ No newline at end of file diff --git a/deepspeed/build-deepspeed.sh b/deepspeed/build-deepspeed.sh new file mode 100755 index 00000000..35977bba --- /dev/null +++ b/deepspeed/build-deepspeed.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash + +CUDA_VERSION=12.1.1 +PYTHON_VERSION=3.11 +PYTORCH_VERSION=2.2.1 +DEEPSPEED_VERSION=0.16.1 + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +cd $SCRIPT_DIR + +# Parse arguments +while [ "$#" -gt 0 ]; do + case "$1" in + --cuda-version) + CUDA_VERSION="$2" + shift + ;; + --python-version) + PYTHON_VERSION="$2" + shift + ;; + --pytorch-version) + PYTORCH_VERSION="$2" + shift + ;; + --deepspeed-version) + DEEPSPEED_VERSION="$2" + shift + ;; + *) + # Allow to pass arbitrary arguments to docker as well to be flexible: + echo "Unknown argument '$1'" + exit 1 + ;; + esac + shift +done + +PYTHON_VERSION_NO_DOT=${PYTHON_VERSION//./} +if [[ -n $(find build -name "deepspeed-${DEEPSPEED_VERSION}*-cp${PYTHON_VERSION_NO_DOT}-cp${PYTHON_VERSION_NO_DOT}-*.whl") ]] +then + echo "DeepSpeed was already built - skipping..." + exit 0 +fi + +echo "Building DeepSpeed $DEEPSPEED_VERSION for CUDA $CUDA_VERSION using python ${PYTHON_VERSION} with PyTorch ${PYTORCH_VERSION}" + +rm -rf build # make sure to properly clean up - we only want 1 wheel at the time +mkdir -p build +docker buildx \ + build \ + --build-arg CUDA_VERSION=$CUDA_VERSION \ + --build-arg PYTHON_VERSION=$PYTHON_VERSION \ + --build-arg PYTORCH_VERSION=$PYTORCH_VERSION \ + --build-arg DEEPSPEED_VERSION=$DEEPSPEED_VERSION \ + -t deepspeed:cu-$CUDA_VERSION-ds-$DEEPSPEED_VERSION \ + . + +docker run \ + --rm \ + -it \ + --gpus=all \ + --name deepspeed \ + -v $SCRIPT_DIR/build:/deepspeed \ + deepspeed:cu-$CUDA_VERSION-ds-$DEEPSPEED_VERSION \ No newline at end of file diff --git a/docker-build.sh b/docker-build.sh index bb60d10a..3c0e1aaa 100755 --- a/docker-build.sh +++ b/docker-build.sh @@ -1,11 +1,26 @@ #!/usr/bin/env bash TTS_MODEL=xtts +CUDA_VERSION=12.1.1 +PYTHON_VERSION=3.11.9 +PYTORCH_VERSION=2.2.1 DOCKER_TAG=latest # Parse arguments while [ "$#" -gt 0 ]; do case "$1" in + --cuda-version) + CUDA_VERSION="$2" + shift + ;; + --python-version) + PYTHON_VERSION="$2" + shift + ;; + --pytorch-version) + PYTORCH_VERSION="$2" + shift + ;; --tts_model) TTS_MODEL="$2" shift @@ -22,11 +37,28 @@ while [ "$#" -gt 0 ]; do shift done +echo "$PYTHON_VERSION -> ${PYTHON_VERSION%.*}" + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +cd $SCRIPT_DIR + +PYTHON_MAJOR_MINOR=${PYTHON_VERSION%.*} +$SCRIPT_DIR/deepspeed/build-deepspeed.sh \ + --cuda-version ${CUDA_VERSION} \ + --python-version ${PYTHON_MAJOR_MINOR} \ + --pytorch-version ${PYTORCH_VERSION} + echo "Starting docker build process using TTS model '${TTS_MODEL}' and docker tag '${DOCKER_TAG}'" +echo "Building for CUDA $CUDA_VERSION using python ${PYTHON_VERSION} with PyTorch ${PYTORCH_VERSION}" + docker buildx \ build \ + --progress=plain \ --build-arg TTS_MODEL=$TTS_MODEL \ + --build-arg CUDA_VERSION=$CUDA_VERSION \ + --build-arg PYTHON_VERSION=$PYTHON_VERSION \ + --build-arg PYTORCH_VERSION=$PYTORCH_VERSION \ -t alltalk_beta:${DOCKER_TAG} \ .