From 49f7356a84db79fe391f848058440efe2f3f7ec7 Mon Sep 17 00:00:00 2001 From: Won-Kyu Park Date: Tue, 6 Feb 2024 04:28:52 +0900 Subject: [PATCH] revert already merged stuff, accidentally reverted by PR #949 without reason. --- .github/workflows/python-package.yml | 264 +++++++++++++++++---------- 1 file changed, 171 insertions(+), 93 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 265128637..b502336b1 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -1,9 +1,9 @@ name: Python package on: - push: {} + push: + branches: [ "main" ] pull_request: - branches: [ main ] paths: - '.github/workflows/python-package.yml' - 'bitsandbytes/**' @@ -17,8 +17,13 @@ on: - 'pytest.ini' - '**/*.md' release: + branches: [ "main" ] types: [ published ] +concurrency: + group: cmake-${{ github.ref }} + cancel-in-progress: true + jobs: ## @@ -26,9 +31,13 @@ jobs: ## build-shared-libs: strategy: + # Set fail-fast to false to ensure that feedback is delivered for all matrix combinations. Consider changing this to true when your workflow is stable. + fail-fast: false + matrix: os: [ubuntu-latest, macos-latest, windows-latest] arch: [x86_64, aarch64] + build_type: [Release] exclude: - os: windows-latest # This probably requires arm64 Windows agents arch: aarch64 @@ -36,123 +45,193 @@ jobs: steps: # Check out code - uses: actions/checkout@v4 - # On Linux we use CMake within Docker - - name: Setup cmake - uses: jwlawson/actions-setup-cmake@v1.14 - with: - cmake-version: '3.26.x' - - name: Add msbuild to PATH - uses: microsoft/setup-msbuild@v1.1 - if: ${{ startsWith(matrix.os, 'windows') }} - # Check out dependencies code - - uses: actions/checkout@v4 - name: Check out NVidia cub + + - name: Set up MSVC + if: matrix.os == 'windows-latest' + uses: ilammy/msvc-dev-cmd@v1.13.0 with: - repository: nvidia/cub - ref: 1.11.0 - path: dependencies/cub - # Compile C++ code - - name: Build C++ + arch: amd64 + + - name: Set reusable strings + # Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file. + id: strings shell: bash run: | - set -ex - build_os=${{ matrix.os }} - build_arch=${{ matrix.arch }} - if [ ${build_os:0:6} == ubuntu -a ${build_arch} == aarch64 ]; then - # Allow cross-compile om aarch64 - sudo apt-get install -y gcc-aarch64-linux-gnu binutils-aarch64-linux-gnu - fi - if [ ${build_os:0:5} == macos -a ${build_arch} == aarch64 ]; then - cmake -DCMAKE_OSX_ARCHITECTURES=arm64 -DCOMPUTE_BACKEND=cpu . - else - cmake -DCOMPUTE_BACKEND=cpu . - fi - if [ ${build_os:0:7} == windows ]; then - pwsh -Command "msbuild bitsandbytes.vcxproj /property:Configuration=Release" + echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT" + + - name: Prep build + run: python3 -m pip install cmake==3.27.9 ninja setuptools wheel + + - name: Prep Compilers + shell: bash -el {0} + run: | + if [ "${{ matrix.os }}" = "windows-latest" ]; then + echo CXX_COMPILER=cl >> "$GITHUB_ENV" else - make + echo CXX_COMPILER=g++ >> "$GITHUB_ENV" fi + + + - name: Configure CPU + run: > + cmake -B ${{ steps.strings.outputs.build-output-dir }} + -G Ninja + -DCMAKE_CXX_COMPILER=${{ env.CXX_COMPILER }} + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + -DCOMPUTE_BACKEND=cpu + -S ${{ github.workspace }} + + - name: Build CPU + run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }} + + - name: Copy libraries + shell: bash + run: | mkdir -p output/${{ matrix.os }}/${{ matrix.arch }} ( shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} output/${{ matrix.os }}/${{ matrix.arch }}/ ) - name: Upload build artifact uses: actions/upload-artifact@v4 with: - name: shared_library_${{ matrix.os }}_${{ matrix.arch }} + name: shared_library-${{ matrix.os }}-${{ matrix.arch }} path: output/* - retention-days: 7 ## # This job matrix builds the CUDA versions of the libraries for platforms that support CUDA (Linux x64/aarch64 + Windows x64) ## build-shared-libs-cuda: strategy: + # Set fail-fast to false to ensure that feedback is delivered for all matrix combinations. Consider changing this to true when your workflow is stable. + fail-fast: false + matrix: os: [ubuntu-latest, windows-latest] + cuda-version: ['11.8', '12.1'] arch: [x86_64, aarch64] - cuda_version: ['12.1.0'] + build_type: [Release] exclude: - os: windows-latest # This probably requires arm64 Windows agents arch: aarch64 + runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents steps: # Check out code - uses: actions/checkout@v4 - # Linux: We use Docker to build cross platform Cuda (aarch64 is built in emulation) - - name: Set up Docker multiarch - if: startsWith(matrix.os, 'ubuntu') - uses: docker/setup-qemu-action@v2 - # On Linux we use CMake within Docker - - name: Setup cmake - if: ${{ !startsWith(matrix.os, 'linux') }} - uses: jwlawson/actions-setup-cmake@v1.14 + - name: Set up Python 3.10 + uses: actions/setup-python@v5 with: - cmake-version: '3.26.x' - # Windows: We install Cuda on the agent (slow) - - uses: Jimver/cuda-toolkit@v0.2.14 - if: startsWith(matrix.os, 'windows') - id: cuda-toolkit + python-version: "3.10" + + - name: Set up MSVC + if: matrix.os == 'windows-latest' + uses: ilammy/msvc-dev-cmd@v1.13.0 with: - cuda: ${{ matrix.cuda_version }} - method: 'local' - # sub-packages: '["nvcc","cudart","nvrtc_dev","cublas_dev","cusparse_dev","visual_studio_integration"]' - - name: Add msbuild to PATH - uses: microsoft/setup-msbuild@v1.1 - if: ${{ startsWith(matrix.os, 'windows') }} - # Check out dependencies code - - uses: actions/checkout@v4 - name: Check out NVidia cub + arch: amd64 + + - name: Setup Mambaforge + uses: conda-incubator/setup-miniconda@v3.0.1 with: - repository: nvidia/cub - ref: 1.11.0 - path: dependencies/cub - # Compile C++ code - - name: Build C++ + miniforge-variant: Mambaforge + miniforge-version: latest + activate-environment: bnb-env + use-mamba: true + + - uses: conda-incubator/setup-miniconda@v3.0.1 + with: + auto-update-conda: true + activate-environment: bnb-env + environment-file: environment-bnb.yml + use-only-tar-bz2: false + auto-activate-base: true + python-version: "3.10" + mamba-version: "*" + + - name: Set reusable strings + # Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file. + id: strings + shell: bash + run: | + echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT" + + - name: CUDA Toolkit + shell: bash -el {0} + run: | + if [ "${{ matrix.os }}" = "ubuntu-latest" ]; then + # to prepare space + sudo rm -rf /usr/share/dotnet + sudo rm -rf /opt/ghc + sudo rm -rf /usr/local/share/boost + fi + addon="" + cuda_version=${{ matrix.cuda-version }} + [ "$cuda_version" = "12.1" ] && [ "${{ matrix.os }}" = "ubuntu-latest" ] && addon="cuda-cudart-static cuda-nvrtc" + [ "$cuda_version" = "12.1" ] && [ "${{ matrix.os }}" = "windows-latest" ] && addon="cuda-nvrtc" + [ "$cuda_version" = "11.8" ] && cuda_version="11.8.0" + [ "$cuda_version" = "12.1" ] && cuda_version="12.1.1" + + conda install pytorch-cuda=${{ matrix.cuda-version }} -c pytorch # it's dependency not correctly resolved sometime + conda install cuda-python=${{ matrix.cuda-version }} cuda-libraries-dev cuda-nvcc cuda-nvtx cuda-cupti cuda-cudart cuda-cudart-dev cuda-runtime cuda-libraries $addon -c "nvidia/label/cuda-$cuda_version" + + [ "${{ matrix.os }}" = "windows-latest" ] && conda install "clang>=17.0.6" "clangxx>=17.0.6" -c conda-forge + + CUDA_HOME="${{ env.CONDA }}/envs/bnb-env" + echo CUDA_HOME=$CUDA_HOME >> "$GITHUB_ENV" + echo CUDA_PATH=$CUDA_HOME >> "$GITHUB_ENV" + + if [ "${{ matrix.os }}" = "windows-latest" ]; then + echo CXX_COMPILER=cl >> "$GITHUB_ENV" + echo C_COMPILER=cl >> "$GITHUB_ENV" + # without -DCMAKE_CUDA_COMPILER=nvcc, cmake config always fail for cuda-11.8 + echo DCMAKE_CUDA_COMPILER=-DCMAKE_CUDA_COMPILER=nvcc >> "$GITHUB_ENV" + else + echo CXX_COMPILER=g++ >> "$GITHUB_ENV" + echo C_COMPILER=gcc >> "$GITHUB_ENV" + fi + + nvcc --version + + - name: Update environment + run: mamba env update -n bnb-env -f environment-bnb.yml + + - name: Prep build + run: python -m pip install cmake==3.27.9 ninja setuptools wheel + + # TODO: the following steps (CUDA, NOBLASLT, CPU) could be moved to the matrix, so they're built in parallel + + - name: Configure CUDA + run: > + cmake -B ${{ steps.strings.outputs.build-output-dir }} + -G Ninja ${{ env.DCMAKE_CUDA_COMPILER }} + -DCMAKE_CXX_COMPILER=${{ env.CXX_COMPILER }} + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + -DCOMPUTE_CAPABILITY="50;52;60;61;62;70;72;75;80;86;87;89;90" + -DCOMPUTE_BACKEND=cuda + -S ${{ github.workspace }} + + - name: Build CUDA + run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }} + + - name: Configure NOBLASLT + run: > + cmake -B ${{ steps.strings.outputs.build-output-dir }} + -G Ninja ${{ env.DCMAKE_CUDA_COMPILER }} + -DCMAKE_CXX_COMPILER=${{ env.CXX_COMPILER }} + -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + -DCOMPUTE_CAPABILITY="50;52;60;61;62;70;72;75;80;86;87;89;90" + -DNO_CUBLASLT=ON + -S ${{ github.workspace }} + + - name: Build NOBLASLT + run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config ${{ matrix.build_type }} + + - name: Copy libraries shell: bash run: | - set -ex - build_os=${{ matrix.os }} - build_arch=${{ matrix.arch }} - for NO_CUBLASLT in ON OFF; do - if [ ${build_os:0:6} == ubuntu ]; then - image=nvidia/cuda:${{ matrix.cuda_version }}-devel-ubuntu22.04 - echo "Using image $image" - docker run --platform linux/$build_arch -i -w /src -v $PWD:/src $image sh -c \ - "apt-get update \ - && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends cmake \ - && cmake -DCOMPUTE_BACKEND=cuda -DNO_CUBLASLT=${NO_CUBLASLT} . \ - && make" - else - cmake -DCOMPUTE_BACKEND=cuda -DNO_CUBLASLT=${NO_CUBLASLT} . - pwsh -Command "msbuild bitsandbytes.vcxproj /property:Configuration=Release" - fi - done mkdir -p output/${{ matrix.os }}/${{ matrix.arch }} ( shopt -s nullglob && cp bitsandbytes/*.{so,dylib,dll} output/${{ matrix.os }}/${{ matrix.arch }}/ ) - name: Upload build artifact uses: actions/upload-artifact@v4 with: - name: shared_library_cuda_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.cuda_version }} + name: shared_library_cuda-${{ matrix.os }}-${{ matrix.cuda-version }}-${{ matrix.arch }} path: output/* - retention-days: 7 build-wheels: needs: - build-shared-libs @@ -160,10 +239,9 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.9", "3.10", "3.11", "3.12"] arch: [x86_64, aarch64] exclude: - - os: windows-latest # This probably requires arm64 Windows agents + - os: windows-latest arch: aarch64 runs-on: ${{ matrix.os }} steps: @@ -174,7 +252,7 @@ jobs: uses: actions/download-artifact@v4 with: merge-multiple: true - pattern: "shared_library*_${{ matrix.os }}_${{ matrix.arch }}*" + pattern: "shared_library*-${{ matrix.os }}-*" path: output/ - name: Copy correct platform shared library shell: bash @@ -182,10 +260,10 @@ jobs: ls -lR output/ cp output/${{ matrix.os }}/${{ matrix.arch }}/* bitsandbytes/ # Set up the Python version needed - - name: Set up Python ${{ matrix.python-version }} + - name: Set up Python 3.10 uses: actions/setup-python@v5 with: - python-version: ${{ matrix.python-version }} + python-version: "3.10" cache: pip - name: Install build package shell: bash @@ -200,13 +278,13 @@ jobs: # PYTHONPATH=. pytest --log-cli-level=DEBUG tests - name: Build wheel shell: bash - run: python -m build . - - name: Upload build artifact + run: python -m build . --wheel + - name: Upload Build Artifacts uses: actions/upload-artifact@v4 with: - name: bdist_wheel_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.python-version }} - path: dist/bitsandbytes-*.whl - retention-days: 7 + name: bdist_wheel-${{ matrix.os }}-${{ matrix.arch }} + path: | + ${{ github.workspace }}/dist/ publish: needs: build-wheels runs-on: ubuntu-latest @@ -217,7 +295,7 @@ jobs: with: path: dist/ merge-multiple: true - pattern: "bdist_wheel_*" + pattern: "bdist_wheel-*" - run: | ls -lR dist/ - name: Publish to PyPi