diff --git a/.github/scripts/build.sh b/.github/scripts/build.sh new file mode 100644 index 000000000..2e5adcd6a --- /dev/null +++ b/.github/scripts/build.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +python_executable=python$1 +cuda_home=/usr/local/cuda-$2 + +# Update paths +PATH=${cuda_home}/bin:$PATH +LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH + +# Install requirements +$python_executable -m pip install wheel packaging + +# Limit the number of parallel jobs to avoid OOM +export MAX_JOBS=2 +# Make sure release wheels are built for the following architectures +export TORCH_CUDA_ARCH_LIST="8.0 8.6 8.9 9.0+PTX" +# Build +$python_executable setup.py bdist_wheel --dist-dir=dist diff --git a/.github/scripts/create_release.js b/.github/scripts/create_release.js new file mode 100644 index 000000000..475742118 --- /dev/null +++ b/.github/scripts/create_release.js @@ -0,0 +1,20 @@ +// Uses Github's API to create the release and wait for result. +// We use a JS script since github CLI doesn't provide a way to wait for the release's creation and returns immediately. + +module.exports = async (github, context, core) => { + try { + const response = await github.rest.repos.createRelease({ + draft: false, + generate_release_notes: true, + name: process.env.RELEASE_TAG, + owner: context.repo.owner, + prerelease: true, + repo: context.repo.repo, + tag_name: process.env.RELEASE_TAG, + }); + + core.setOutput('upload_url', response.data.upload_url); + } catch (error) { + core.setFailed(error.message); + } +} \ No newline at end of file diff --git a/.github/scripts/cuda-install.sh b/.github/scripts/cuda-install.sh new file mode 100644 index 000000000..312c6e82f --- /dev/null +++ b/.github/scripts/cuda-install.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# Replace '.' with '-' ex: 11.8 -> 11-8 +cuda_version=$(echo $1 | tr "." "-") +# Removes '-' and '.' ex: ubuntu-20.04 -> ubuntu2004 +OS=$(echo $2 | tr -d ".\-") + +# Installs CUDA +wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-keyring_1.1-1_all.deb +sudo dpkg -i cuda-keyring_1.1-1_all.deb +rm cuda-keyring_1.1-1_all.deb +sudo apt -qq update +sudo apt -y install cuda-${cuda_version} cuda-nvcc-${cuda_version} cuda-libraries-dev-${cuda_version} +sudo apt clean + +# Test nvcc +PATH=/usr/local/cuda-$1/bin:${PATH} +nvcc --version + +# Log gcc, g++, c++ versions +gcc --version +g++ --version +c++ --version diff --git a/.github/scripts/env.sh b/.github/scripts/env.sh new file mode 100644 index 000000000..d7baaecbb --- /dev/null +++ b/.github/scripts/env.sh @@ -0,0 +1,56 @@ +#!/bin/bash + +# This file installs common linux environment tools + +export LANG C.UTF-8 + +# python_version=$1 + +sudo apt-get update && \ +sudo apt-get install -y --no-install-recommends \ + software-properties-common \ + +sudo apt-get install -y --no-install-recommends \ + build-essential \ + apt-utils \ + ca-certificates \ + wget \ + git \ + vim \ + libssl-dev \ + curl \ + unzip \ + unrar \ + cmake \ + net-tools \ + sudo \ + autotools-dev \ + rsync \ + jq \ + openssh-server \ + tmux \ + screen \ + htop \ + pdsh \ + openssh-client \ + lshw \ + dmidecode \ + util-linux \ + automake \ + autoconf \ + libtool \ + net-tools \ + pciutils \ + libpci-dev \ + libaio-dev \ + libcap2 \ + libtinfo5 \ + fakeroot \ + devscripts \ + debhelper \ + nfs-common + +# Remove github bloat files to free up disk space +sudo rm -rf "/usr/local/share/boost" +sudo rm -rf "$AGENT_TOOLSDIRECTORY" +sudo rm -rf "/usr/share/dotnet" diff --git a/.github/scripts/pytorch-install.sh b/.github/scripts/pytorch-install.sh new file mode 100644 index 000000000..dfc1851d7 --- /dev/null +++ b/.github/scripts/pytorch-install.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +python_executable=python$1 +pytorch_version=$2 +cuda_version=$3 + +# Install torch +$python_executable -m pip install numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses setuptools && conda clean -ya +$python_executable -m pip install torch==${pytorch_version}+cu${cuda_version//./} --extra-index-url https://download.pytorch.org/whl/cu${cuda_version//./} + +# Print version information +$python_executable --version +$python_executable -c "import torch; print('PyTorch:', torch.__version__)" +$python_executable -c "import torch; print('CUDA:', torch.version.cuda)" +$python_executable -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)" diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index c3a5c659d..55e1f2f30 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,206 +1,99 @@ -# This workflow will: -# - Create a new Github release -# - Build wheels for supported architectures -# - Deploy the wheels to the Github release -# - Release the static code to PyPi -# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries +# This workflow will upload a Python Package to Release asset +# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions -name: Build wheels and deploy +name: Create Release on: - create: + push: tags: - v* -jobs: +# Needed to create release and upload assets +permissions: + contents: write - setup_release: +jobs: + release: + # Retrieve tag and create release name: Create Release runs-on: ubuntu-latest + outputs: + upload_url: ${{ steps.create_release.outputs.upload_url }} steps: - - name: Get the tag version - id: extract_branch - run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/} + - name: Checkout + uses: actions/checkout@v3 + + - name: Extract branch info shell: bash + run: | + echo "release_tag=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV - name: Create Release id: create_release - uses: actions/create-release@v1 + uses: "actions/github-script@v6" env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + RELEASE_TAG: ${{ env.release_tag }} with: - tag_name: ${{ steps.extract_branch.outputs.branch }} - release_name: ${{ steps.extract_branch.outputs.branch }} + github-token: "${{ secrets.GITHUB_TOKEN }}" + script: | + const script = require('.github/workflows/scripts/create_release.js') + await script(github, context, core) - build_wheels: + wheel: name: Build Wheel - needs: setup_release runs-on: ${{ matrix.os }} + needs: release strategy: fail-fast: false matrix: - # Using ubuntu-20.04 instead of 22.04 for more compatibility (glibc). Ideally we'd use the - # manylinux docker image, but I haven't figured out how to install CUDA on manylinux. - os: [ubuntu-20.04] + os: ['ubuntu-20.04'] python-version: ['3.8', '3.9', '3.10', '3.11'] - torch-version: ['2.3.0'] - cuda-version: ['11.8.0', '12.1.1'] - # We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not. - # Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI. - # Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs) - # when building without C++11 ABI and using it on nvcr images. - cxx11_abi: ['FALSE', 'TRUE'] + pytorch-version: ['2.3.0'] # Must be the most recent version that meets requirements-cuda.txt. + cuda-version: ['11.8', '12.1'] steps: - name: Checkout uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} + - name: Setup ccache + uses: hendrikmuhs/ccache-action@v1.2 - - name: Set CUDA and PyTorch versions - run: | - echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV - echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.torch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV - echo "MATRIX_PYTHON_VERSION=$(echo ${{ matrix.python-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV - - - name: Free up disk space + - name: Set up Linux Env if: ${{ runner.os == 'Linux' }} - # https://github.com/easimon/maximize-build-space/blob/master/action.yml - # https://github.com/easimon/maximize-build-space/tree/test-report run: | - sudo rm -rf /usr/share/dotnet - sudo rm -rf /opt/ghc - sudo rm -rf /opt/hostedtoolcache/CodeQL + bash -x .github/workflows/scripts/env.sh - - name: Set up swap space - if: runner.os == 'Linux' - uses: pierotofy/set-swap-space@v1.0 + - name: Set up Python + uses: actions/setup-python@v4 with: - swap-size-gb: 10 + python-version: ${{ matrix.python-version }} - name: Install CUDA ${{ matrix.cuda-version }} - if: ${{ matrix.cuda-version != 'cpu' }} - uses: Jimver/cuda-toolkit@v0.2.14 - id: cuda-toolkit - with: - cuda: ${{ matrix.cuda-version }} - linux-local-args: '["--toolkit"]' - # default method is "local", and we're hitting some error with caching for CUDA 11.8 and 12.1 - # method: ${{ (matrix.cuda-version == '11.8.0' || matrix.cuda-version == '12.1.0') && 'network' || 'local' }} - method: 'network' - # We need the cuda libraries (e.g. cuSparse, cuSolver) for compiling PyTorch extensions, - # not just nvcc - # sub-packages: '["nvcc"]' - - - name: Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }} - run: | - pip install --upgrade pip - # If we don't install before installing Pytorch, we get error for torch 2.0.1 - # ERROR: Could not find a version that satisfies the requirement setuptools>=40.8.0 (from versions: none) - pip install lit - # We want to figure out the CUDA version to download pytorch - # e.g. we can have system CUDA version being 11.7 but if torch==1.12 then we need to download the wheel from cu116 - # see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix - # This code is ugly, maybe there's a better way to do this. - export TORCH_CUDA_VERSION=$(python -c "from os import environ as env; \ - minv = {'1.12': 113, '1.13': 116, '2.0': 117, '2.1': 118, '2.2': 118, '2.3': 118}[env['MATRIX_TORCH_VERSION']]; \ - maxv = {'1.12': 116, '1.13': 117, '2.0': 118, '2.1': 121, '2.2': 121, '2.3': 121}[env['MATRIX_TORCH_VERSION']]; \ - print(max(min(int(env['MATRIX_CUDA_VERSION']), maxv), minv))" \ - ) - if [[ ${{ matrix.torch-version }} == *"dev"* ]]; then - if [[ ${MATRIX_TORCH_VERSION} == "2.2" ]]; then - # --no-deps because we can't install old versions of pytorch-triton - pip install typing-extensions jinja2 - pip install --no-cache-dir --no-deps --pre https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION}/torch-${{ matrix.torch-version }}%2Bcu${TORCH_CUDA_VERSION}-cp${MATRIX_PYTHON_VERSION}-cp${MATRIX_PYTHON_VERSION}-linux_x86_64.whl - else - pip install --no-cache-dir --pre torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/nightly/cu${TORCH_CUDA_VERSION} - fi - else - pip install --no-cache-dir torch==${{ matrix.torch-version }} --index-url https://download.pytorch.org/whl/cu${TORCH_CUDA_VERSION} - fi - nvcc --version - python --version - python -c "import torch; print('PyTorch:', torch.__version__)" - python -c "import torch; print('CUDA:', torch.version.cuda)" - python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)" - shell: - bash - - - name: Build wheel run: | - # We want setuptools >= 49.6.0 otherwise we can't compile the extension if system CUDA version is 11.7 and pytorch cuda version is 11.6 - # https://github.com/pytorch/pytorch/blob/664058fa83f1d8eede5d66418abff6e20bd76ca8/torch/utils/cpp_extension.py#L810 - # However this still fails so I'm using a newer version of setuptools - pip install setuptools==68.0.0 - pip install ninja packaging wheel - export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH - export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH - # Limit MAX_JOBS otherwise the github runner goes OOM - MAX_JOBS=2 FLASH_ATTENTION_FORCE_BUILD="TRUE" FLASH_ATTENTION_FORCE_CXX11_ABI=${{ matrix.cxx11_abi}} python setup.py bdist_wheel --dist-dir=dist - tmpname=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION}cxx11abi${{ matrix.cxx11_abi }} - wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2") - ls dist/*whl |xargs -I {} mv {} dist/${wheel_name} - echo "wheel_name=${wheel_name}" >> $GITHUB_ENV + bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ${{ matrix.os }} - - name: Log Built Wheels + - name: Install PyTorch ${{ matrix.pytorch-version }} with CUDA ${{ matrix.cuda-version }} run: | - ls dist + bash -x .github/workflows/scripts/pytorch-install.sh ${{ matrix.python-version }} ${{ matrix.pytorch-version }} ${{ matrix.cuda-version }} - - name: Get the tag version - id: extract_branch - run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/} - - - name: Get Release with tag - id: get_current_release - uses: joutvhu/get-release@v1 - with: - tag_name: ${{ steps.extract_branch.outputs.branch }} + - name: Build wheel + shell: bash env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + CMAKE_BUILD_TYPE: Release # do not compile with debug symbol to reduce wheel size + run: | + bash -x .github/workflows/scripts/build.sh ${{ matrix.python-version }} ${{ matrix.cuda-version }} + wheel_name=$(ls dist/*whl | xargs -n 1 basename) + asset_name=${wheel_name//"linux"/"manylinux1"} + echo "wheel_name=${wheel_name}" >> $GITHUB_ENV + echo "asset_name=${asset_name}" >> $GITHUB_ENV - name: Upload Release Asset - id: upload_release_asset uses: actions/upload-release-asset@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: - upload_url: ${{ steps.get_current_release.outputs.upload_url }} - asset_path: ./dist/${{env.wheel_name}} - asset_name: ${{env.wheel_name}} + upload_url: ${{ needs.release.outputs.upload_url }} + asset_path: ./dist/${{ env.wheel_name }} + asset_name: ${{ env.asset_name }} asset_content_type: application/* - - publish_package: - name: Publish package - needs: [build_wheels] - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - - name: Install dependencies - run: | - pip install ninja packaging setuptools wheel twine - # We don't want to download anything CUDA-related here - pip install torch --index-url https://download.pytorch.org/whl/cpu - - - name: Build core package - env: - FLASH_ATTENTION_SKIP_CUDA_BUILD: "TRUE" - run: | - python setup.py sdist --dist-dir=dist - - - name: Deploy - env: - TWINE_USERNAME: "__token__" - TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} - run: | - python -m twine upload dist/*