diff --git a/.github/workflows/build-and-push-nightly-pytorch-compile-clang.yml b/.github/workflows/build-and-push-nightly-pytorch-compile-clang.yml new file mode 100644 index 0000000..c47b77d --- /dev/null +++ b/.github/workflows/build-and-push-nightly-pytorch-compile-clang.yml @@ -0,0 +1,26 @@ +name: build-and-push-nightly-pytorch-compile-clang + +on: + schedule: + # Runs "nightly, at 3am" (see https://crontab.guru) + - cron: '0 3 * * *' + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + environment: image-build-and-push + steps: + # docker login source https://github.com/docker/login-action + - uses: docker/login-action@v2 + with: + registry: ghcr.io + # Note: these values can be configured under https://github.com/foundation-model-stack/base-images/settings/environments/614355731/edit + username: ${{ secrets.GH_REGISTRY_USER }} + password: ${{ secrets.GH_REGISTRY_PUSH_TOKEN }} + + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v3 + + - name: build and push + run: ./nightly-pytorch-compile-clang/buildAndPush.sh diff --git a/nightly-pytorch-compile-clang/Dockerfile b/nightly-pytorch-compile-clang/Dockerfile new file mode 100644 index 0000000..eaea1cc --- /dev/null +++ b/nightly-pytorch-compile-clang/Dockerfile @@ -0,0 +1,70 @@ +# switch to miniconda3 base as pytorch base uses python3.7 +FROM continuumio/miniconda3:latest + +ENV HOME=/homedir \ + AIM_UI_TELEMETRY_ENABLED=0 + +# tools +RUN apt-get -y -o Acquire::Max-FutureTime=86400 update && \ + apt-get install -y gnupg vim nmon clang software-properties-common && \ + apt-key del 7fa2af80 && \ + add-apt-repository contrib && \ + wget https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.0-1_all.deb && \ + dpkg -i cuda-keyring_1.0-1_all.deb && \ + apt-get -y -o Acquire::Max-FutureTime=86400 update && \ + apt-get install -y cuda-compiler-12-1 cuda-nsight-systems-12-1 && \ + apt-get -y clean + +RUN chgrp -R 0 /opt/conda && \ + chmod -R a+w /opt/conda && \ + chmod -R g=u /opt/conda + +# This is important for certain pytorch deps +RUN pip install --upgrade setuptools && \ + chmod -R g+w /opt/conda && \ + chmod -R g=u /opt/conda + +# Pytorch nightly install +RUN pip install --pre --upgrade torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/cu118 --no-cache-dir && \ + chmod -R g+w /opt/conda && \ + chmod -R g=u /opt/conda + +# permanent dependencies, we put them after torch so we don't end up installing torch twice and wasting a lot of space +# (each install of torch is about 2GB!!!) +# decision has been made to have them as the latest version always, as this is a nightly image after all +RUN pip install \ + transformers \ + aim \ + datasets \ + accelerate \ + optimum \ + tqdm \ + protobuf \ + colorama \ + scikit-learn \ + ninja \ + --no-cache-dir && \ + chmod -R g+w /opt/conda && \ + chmod -R g=u /opt/conda + +# xformers install (built from source) +# deactivated until we get better workers as there isn't enough ram, cpu, or disk space to compile them anymore +# ENV CUDA_HOME=/usr/local/cuda +# ENV MAX_JOBS=2 +# ENV TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6" +# RUN apt-get install -y libcublas-11-8 libcublas-dev-11-8 libcusparse-11-8 libcusparse-dev-11-8 libcusolver-11-8 libcusolver-dev-11-8 libcurand-11-8 libcurand-dev-11-8 && \ +# pip install -v -U git+https://github.com/facebookresearch/xformers.git@main#egg=xformers --no-cache-dir && \ +# chmod -R g+w /opt/conda && \ +# chmod -R g=u /opt/conda && \ +# apt-get remove -y libcublas-11-8 libcublas-dev-11-8 libcusparse-11-8 libcusparse-dev-11-8 libcusolver-11-8 libcusolver-dev-11-8 libcurand-11-8 libcurand-dev-11-8 && \ +# apt-get -y clean + +# setup .aim_profile (needed to get aim running) +RUN mkdir -p ${HOME} && \ + touch ~/.aim_profile && \ + chmod g+w ~/.aim_profile + +WORKDIR /workspace + +RUN chgrp -R 0 . && \ + chmod -R g=u . diff --git a/nightly-pytorch-compile-clang/buildAndPush.sh b/nightly-pytorch-compile-clang/buildAndPush.sh new file mode 100755 index 0000000..8897ce7 --- /dev/null +++ b/nightly-pytorch-compile-clang/buildAndPush.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +# override image name with optional command line argument ./buildAndPush.sh +# otherwise it will be tagged with pytorch-latest-nightly and timestamp + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +REGISTRY='ghcr.io' +NAMESPACE='foundation-model-stack' +NAME='base' + +cd "${SCRIPT_DIR}" + +TIMESTAMP=$(date "+%Y%m%d") + +# note: docker tags must be valid ASCII and may contain lowercase and uppercase letters, digits, underscores, periods and dashes. +# A tag name may not start with a period or a dash and may contain a maximum of 128 characters +VERSION="$(echo ${1:-pytorch-compile-clang-latest-nightly-$TIMESTAMP}| sed 's/[^[:alnum:]\.\_\-]//g')" +TAG="${REGISTRY}/${NAMESPACE}/${NAME}:${VERSION}" + +docker build -t "${TAG}" . + +docker push "${TAG}"