Skip to content

enable flash_attn_with_kvcache #27

enable flash_attn_with_kvcache

enable flash_attn_with_kvcache #27

Workflow file for this run

name: AMD Perf Kernel Tests
on:
workflow_dispatch:
pull_request:
branches: [main_perf]
merge_group:
branches: [main_perf]
types: [checks_requested]
push:
branches: [main_perf]
concurrency:
group: ${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main_perf' }}
permissions: read-all
jobs:
Runner-Preparation-AMD:
runs-on: ubuntu-latest
timeout-minutes: 30
outputs:
matrix-HIP: ${{ steps.set-matrix.outputs.matrix-HIP }}
steps:
- name: Prepare runner matrix
id: set-matrix
run: |
if [ x"${{ github.repository }}" == x"ROCm/flash-attention" ]; then
echo '::set-output name=matrix-HIP::[["self-hosted", "rocm"]]'
else
echo '::set-output name=matrix-HIP::[["ubuntu-latest"]]'
fi
Integration-Tests-AMD:
needs: Runner-Preparation-AMD
if: needs.Runner-Preparation-AMD.outputs.matrix-HIP != ''
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: ${{fromJson(needs.Runner-Preparation-AMD.outputs.matrix-HIP)}}
container:
image: rocm/pytorch:rocm6.0.2_ubuntu22.04_py3.10_pytorch_2.1.2
options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Install Triton
run: |
pip uninstall -y triton
pip install matplotlib pandas pytest
git clone https://github.com/triton-lang/triton
cd triton
pip install --verbose -e python
cd ..
- name: Build
run: |
python setup.py install
- name: Flash Attention Tests
run: |
pytest tests/test_flash_attn.py::test_flash_attn_kvcache
pytest tests/test_flash_attn.py::test_flash_attn_output
pytest tests/test_flash_attn.py::test_flash_attn_varlen_output
- name: AMD Kernel Tests
run: |
pytest flash_attn/flash_attn_triton_kernel_prefill_amd.py
pytest flash_attn/flash_attn_triton_kernel_decode_amd.py::test_op_fwd