Skip to content

Add Ascend NPU support for nf4 quant (#1422) #952

Add Ascend NPU support for nf4 quant (#1422)

Add Ascend NPU support for nf4 quant (#1422) #952

name: Python package
on:
push: {}
pull_request:
branches: [main]
paths:
- ".github/workflows/python-package.yml"
- "bitsandbytes/**"
- "csrc/**"
- "include/**"
- "tests/**"
- "CMakeLists.txt"
- "requirements*.txt"
- "setup.py"
- "pyproject.toml"
- "pytest.ini"
release:
types: [published]
workflow_dispatch: {} # Allow manual trigger
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
##
# This job matrix builds the non-CUDA versions of the libraries for all supported platforms.
##
build-shared-libs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
arch: [x86_64, aarch64]
exclude:
- os: windows-latest # This probably requires arm64 Windows agents
arch: aarch64
- os: ubuntu-latest # Temporary. Takes too long, not ready yet.
arch: aarch64
runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents
steps:
- uses: actions/checkout@v4
- name: Setup MSVC
if: startsWith(matrix.os, 'windows')
uses: ilammy/[email protected] # to use cl
- name: Build C++
run: bash .github/scripts/build-cpu.sh
env:
build_os: ${{ matrix.os }}
build_arch: ${{ matrix.arch }}
- name: Upload build artifact
uses: actions/upload-artifact@v4
with:
name: shared_library_${{ matrix.os }}_${{ matrix.arch }}
path: output/*
retention-days: 7
##
# This job matrix builds the CUDA versions of the libraries for platforms that support CUDA (Linux x64/aarch64 + Windows x64)
##
build-shared-libs-cuda:
if: github.ref_name != 'multi-backend-refactor'
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
arch: [x86_64, aarch64]
cuda_version:
["11.7.1", "11.8.0", "12.0.1", "12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.0"]
exclude:
- os: windows-latest # This probably requires arm64 Windows agents
arch: aarch64
- os: ubuntu-latest # Temporary. Takes too long, not ready yet.
arch: aarch64
runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents
steps:
- uses: actions/checkout@v4
# Linux: We use Docker to build cross platform Cuda (aarch64 is built in emulation)
- name: Set up Docker multiarch
if: startsWith(matrix.os, 'ubuntu')
uses: docker/setup-qemu-action@v2
# Windows: We install Cuda on the agent (slow)
- uses: Jimver/[email protected]
if: startsWith(matrix.os, 'windows')
id: cuda-toolkit
with:
cuda: ${{ matrix.cuda_version }}
method: "network"
sub-packages: '["nvcc","cudart","cusparse","cublas","thrust","nvrtc_dev","cublas_dev","cusparse_dev"]'
linux-local-args: '["--toolkit"]'
use-github-cache: false
- name: Setup MSVC
if: startsWith(matrix.os, 'windows')
uses: ilammy/[email protected] # to use cl
- name: Build C++
run: bash .github/scripts/build-cuda.sh
env:
build_os: ${{ matrix.os }}
build_arch: ${{ matrix.arch }}
cuda_version: ${{ matrix.cuda_version }}
- name: Upload build artifact
uses: actions/upload-artifact@v4
with:
name: shared_library_cuda_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.cuda_version }}
path: output/*
retention-days: 7
build-shared-libs-rocm:
strategy:
matrix:
os: [ubuntu-latest]
arch: [x86_64]
rocm_version:
["6.1.2", "6.2"]
runs-on: ${{ matrix.os }} # One day, we could run them on native agents. Azure supports this now but it's planned only for Q3 2023 for hosted agents
steps:
- uses: actions/checkout@v4
- name: Set up Docker multiarch
if: startsWith(matrix.os, 'ubuntu')
uses: docker/setup-qemu-action@v2
- name: Clean up disk space
run: |
sudo rm -rf \
/usr/share/dotnet \
/opt/ghc \
"/usr/local/share/boost" \
"$AGENT_TOOLSDIRECTORY" \
/opt/hostedtoolcache \
/opt/google/chrome \
/opt/microsoft/msedge \
/opt/microsoft/powershell \
/opt/pipx \
/usr/lib/mono \
/usr/local/julia* \
/usr/local/lib/android \
/usr/local/lib/node_modules \
/usr/local/share/chromium \
/usr/local/share/powershell \
/usr/share/swift
- name: Build C++
run: bash .github/scripts/build-rocm.sh
env:
build_os: ${{ matrix.os }}
build_arch: ${{ matrix.arch }}
rocm_version: ${{ matrix.rocm_version }}
- name: Upload build artifact
uses: actions/upload-artifact@v4
with:
name: shared_library_rocm_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.rocm_version }}
path: output/*
retention-days: 7
build-wheels:
needs:
- build-shared-libs
# - build-shared-libs-cuda reduce the pkg size + build times for the preview release
- build-shared-libs-rocm
strategy:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
# The specific Python version is irrelevant in this context as we are only packaging non-C extension
# code. This ensures compatibility across Python versions, including Python 3.8, as compatibility is
# dictated by the packaged code itself, not the Python version used for packaging.
python-version: ["3.10"]
arch: [x86_64, aarch64]
exclude:
- os: windows-latest # This probably requires arm64 Windows agents
arch: aarch64
- os: ubuntu-latest # Temporary. Takes too long, not ready yet.
arch: aarch64
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 1 # shallow clone
- name: Fetch tags for dynamic versioning in setup.py
run: |
git fetch --depth=1 origin --tags
echo "Available Git tags:"
git tag -n
- name: Download build artifact
uses: actions/download-artifact@v4
with:
merge-multiple: true
pattern: "shared_library*_${{ matrix.os }}_${{ matrix.arch }}*"
path: output/
- name: Copy correct platform shared library
shell: bash
run: |
ls -lR output/
cp output/${{ matrix.os }}/${{ matrix.arch }}/* bitsandbytes/
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: pip
- run: pip install build wheel
# for now need to do the below instead of prior `python -m build .`, which didn't allow us to access git tags
- run: python -m build --sdist && python -m build --wheel
- name: Determine and Set Platform Tag, then Tag Wheel
shell: bash
run: |
PLATFORM_TAG=$(python .github/scripts/set_platform_tag.py "${{ matrix.arch }}")
echo "PLATFORM_TAG=$PLATFORM_TAG"
wheel tags --remove --abi-tag=none --python-tag=py3 --platform-tag=$PLATFORM_TAG dist/bitsandbytes-*.whl
- name: Upload build artifact
uses: actions/upload-artifact@v4
with:
name: bdist_wheel_${{ matrix.os }}_${{ matrix.arch }}
path: dist/bitsandbytes-*.whl
retention-days: 7
upload-pre-release-wheels:
name: Create release and upload artifacts
runs-on: ubuntu-latest
if: github.ref_name == 'multi-backend-refactor'
permissions:
contents: write
needs:
- build-wheels
steps:
- name: Download and rename artifacts
uses: actions/download-artifact@v4
with:
path: tmp/
pattern: "bdist_wheel_*"
merge-multiple: true
- name: Inspect tmp directory after downloading artifacts
run: ls -alFR tmp/
- name: Move and rename wheel files with pattern replacement
run: |
mkdir -p wheels/
find tmp/ -type f -name '*.whl' -print0 | while IFS= read -r -d '' wheel; do
wheel_filename=$(basename "$wheel")
# Remove the gith hash, e.g. `+1234567`, for a stable download link on the multi-backend pre-release
cleaned_filename=$(echo "$wheel_filename" | sed -E 's/\+[0-9a-f]{7}-/-/g')
mv "$wheel" "wheels/$cleaned_filename"
done
- name: Inspect wheels directory after renaming files
run: ls -alFR wheels/
- name: Create release and upload artifacts
uses: softprops/[email protected]
with:
files: wheels/*.whl
prerelease: true
name: Multi-Backend Preview
tag_name: continuous-release_multi-backend-refactor
make_latest: false
draft: false
target_commitish: ${{ github.sha }}
audit-wheels:
needs: build-wheels
runs-on: ubuntu-latest
env:
PIP_DISABLE_PIP_VERSION_CHECK: 1
steps:
- uses: actions/checkout@v4
- name: Download all wheels
uses: actions/download-artifact@v4
with:
merge-multiple: true
pattern: "bdist_wheel_*"
path: wheels/
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- run: pip install auditwheel
- run: python ./.github/scripts/auditwheel_show.py wheels/* | tee $GITHUB_STEP_SUMMARY
# test:
# needs:
# - build-wheels
# strategy:
# fail-fast: false
# matrix:
# include:
# - os: ubuntu-latest
# arch: x86_64
# python-version: "3.8"
# - os: windows-latest
# arch: x86_64
# python-version: "3.8"
# runs-on: ${{ matrix.os }}
# steps:
# - uses: actions/checkout@v4
# - uses: actions/download-artifact@v4
# with:
# merge-multiple: true
# pattern: "bdist_wheel_${{ matrix.os }}_${{ matrix.arch }}*"
# path: wheel/
# - uses: actions/setup-python@v5
# with:
# python-version: ${{ matrix.python-version }}
# cache: pip
# - shell: bash
# run: ls -lar wheel/
# - run: pip install wheel/*.whl -r requirements-ci.txt
# - run: pytest --log-cli-level=DEBUG --continue-on-collection-errors tests