reenable gfx1100 ci #475
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: AMD Perf Kernel Tests | |
on: | |
workflow_dispatch: | |
pull_request: | |
branches: [main_perf] | |
concurrency: | |
group: ${{ github.ref }} | |
cancel-in-progress: true | |
permissions: read-all | |
jobs: | |
Integration-Tests-AMD: | |
runs-on: ${{ matrix.runner }} | |
strategy: | |
matrix: | |
runner: [linux-mi300-gpu-1, gfx1100] | |
fail-fast: false # disables failing the entire job when one matrix entry fails | |
container: | |
image: rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0 | |
options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Show Device Info | |
run: | | |
rocminfo | grep gfx | |
- name: Uninstall Triton | |
run : | | |
pip uninstall -y triton | |
rm -rf ~/.triton | |
rm -rf ./triton/python/build | |
- name: Install Triton | |
run: | | |
git clone https://github.com/triton-lang/triton | |
cd triton | |
git checkout 3ca2f498e98ed7249b82722587c511a5610e00c4 | |
pip install ninja cmake wheel pybind11 # build-time dependencies | |
pip install matplotlib pandas pytest # triton bench dependencies | |
pip install --verbose --no-build-isolation ./python | |
cd .. | |
- name: Show Triton version | |
run: | | |
pip show triton | |
- name: Build | |
run: | | |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" | |
python setup.py install | |
- name: Flash Attention Tests using Pytorch reference implementation | |
if: matrix.runner == 'linux-mi300-gpu-1' | |
run: | | |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" | |
FLASH_ATTENTION_TRITON_AMD_REF=1 pytest tests/test_flash_attn_triton_amd.py | |
# CDNA Tests | |
- name: Flash Attention CDNA Tests | |
if: matrix.runner == 'linux-mi300-gpu-1' | |
run: | | |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" | |
pytest tests/test_flash_attn_triton_amd.py | |
# FIXME: run the full suite | |
- name: AMD Tests | |
if: matrix.runner == 'linux-mi300-gpu-1' | |
run: | | |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" | |
pytest -v -s flash_attn/flash_attn_triton_amd/test.py::test_op_prefill_fp8 flash_attn/flash_attn_triton_amd/test.py::test_op_prefill_varlen_fp8 | |
- name: AMD Bench | |
if: matrix.runner == 'linux-mi300-gpu-1' | |
run: | | |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" | |
FLASH_ATTENTION_TRITON_AMD_AUTOTUNE=1 python flash_attn/flash_attn_triton_amd/bench.py | |
# RDNA Tests | |
- name: Flash Attention RDNA Tests | |
if: matrix.runner == 'gfx1100' | |
run: | | |
export FLASH_ATTENTION_TRITON_AMD_ENABLE="TRUE" | |
# NOTE: this exceeds 6 hrs on "gfx1100" so sample a subset of the tests. The full suite is run on a CDNA machine. | |
pytest --randomly-seed=42 --randomly-sample=0.10 tests/test_flash_attn_triton_amd.py |