port perf-bwd-hip branch to rocm6.2 #236
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions | |
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions | |
name: Push Binary Release | |
on: | |
# # For debugging, enable push/pull_request | |
# [push, pull_request] | |
# # run every day at 10:45 AM | |
# schedule: | |
# - cron: '45 10 * * *' | |
# # or manually trigger it | |
# workflow_dispatch: | |
jobs: | |
# build on cpu hosts and upload to GHA | |
build_on_cpu: | |
runs-on: ${{ matrix.os }} | |
strategy: | |
matrix: | |
include: | |
- os: linux.2xlarge | |
python-version: 3.7 | |
python-tag: "py37" | |
cuda-tag: "cu11" | |
- os: linux.2xlarge | |
python-version: 3.8 | |
python-tag: "py38" | |
cuda-tag: "cu11" | |
- os: linux.2xlarge | |
python-version: 3.9 | |
python-tag: "py39" | |
cuda-tag: "cu11" | |
steps: | |
# Checkout the repository to the GitHub Actions runner | |
- name: Check ldd --version | |
run: ldd --version | |
- name: Checkout | |
uses: actions/checkout@v2 | |
with: | |
submodules: true | |
# Update references | |
- name: Git Sumbodule Update | |
run: | | |
cd fbgemm_gpu/ | |
git submodule sync | |
git submodule update --init --recursive | |
- name: Update pip | |
run: | | |
sudo yum update -y | |
sudo yum -y install git python3-pip | |
sudo pip3 install --upgrade pip | |
- name: Setup conda | |
run: | | |
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh | |
bash ~/miniconda.sh -b -p $HOME/miniconda -u | |
- name: Setup PATH with conda | |
run: | | |
echo "/home/ec2-user/miniconda/bin" >> $GITHUB_PATH | |
echo "CONDA=/home/ec2-user/miniconda" >> $GITHUB_PATH | |
- name: Create conda env | |
run: | | |
conda create --name build_binary python=${{ matrix.python-version }} | |
conda info | |
- name: check python version | |
run: | | |
conda run -n build_binary python --version | |
- name: Install CUDA 11.3 | |
shell: bash | |
run: | | |
sudo yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm | |
sudo yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo | |
sudo yum clean expire-cache | |
sudo yum install -y nvidia-driver-latest-dkms | |
sudo yum install -y cuda-11-3 | |
sudo yum install -y cuda-drivers | |
sudo yum install -y libcudnn8-devel | |
- name: setup Path | |
run: | | |
echo /usr/local/cuda-11.3/bin >> $GITHUB_PATH | |
echo /usr/local/bin >> $GITHUB_PATH | |
- name: nvcc check | |
run: | | |
nvcc --version | |
- name: Install PyTorch | |
shell: bash | |
run: | | |
conda run -n build_binary \ | |
python -m pip install --pre torch -f https://download.pytorch.org/whl/test/cu113/torch_test.html | |
- name: Install Dependencies | |
shell: bash | |
run: | | |
cd fbgemm_gpu/ | |
conda run -n build_binary python -m pip install -r requirements.txt | |
- name: Test Installation of dependencies | |
run: | | |
cd fbgemm_gpu/ | |
conda run -n build_binary python -c "import torch.distributed" | |
echo "torch.distributed succeeded" | |
conda run -n build_binary python -c "import skbuild" | |
echo "skbuild succeeded" | |
conda run -n build_binary python -c "import numpy" | |
echo "numpy succeeded" | |
# for the conda run with quotes, we have to use "\" and double quotes | |
# here is the issue: https://github.com/conda/conda/issues/10972 | |
- name: Build FBGEMM_GPU Release | |
run: | | |
cd fbgemm_gpu/ | |
rm -r dist || true | |
# buld cuda7.0;8.0 for v100/a100 arch: | |
# Couldn't build more cuda arch due to 100 MB binary size limit from | |
# pypi website. | |
# manylinux1_x86_64 is specified for pypi upload: | |
# distribute python extensions as wheels on Linux | |
conda run -n build_binary \ | |
python setup.py bdist_wheel \ | |
--package_name=fbgemm_gpu \ | |
--python-tag=${{ matrix.python-tag }} \ | |
-DTORCH_CUDA_ARCH_LIST="'7.0;8.0'" \ | |
--plat-name=manylinux1_x86_64 | |
ls -lt dist/*.whl | |
- name: Upload wheel as GHA artifact | |
uses: actions/upload-artifact@v2 | |
with: | |
name: fbgemm_gpu_${{ matrix.python-version }}_${{ matrix.cuda-tag }}.whl | |
path: fbgemm_gpu/dist/fbgemm_gpu-*.whl | |
# download from GHA, test on gpu and push to pypi | |
test_on_gpu: | |
runs-on: ${{ matrix.os }} | |
strategy: | |
matrix: | |
os: [linux.4xlarge.nvidia.gpu] | |
python-version: [3.7, 3.8, 3.9] | |
cuda-tag: ["cu11"] | |
needs: build_on_cpu | |
steps: | |
- name: Check ldd --version | |
run: ldd --version | |
- name: check cpu info | |
shell: bash | |
run: | | |
cat /proc/cpuinfo | |
- name: check distribution info | |
shell: bash | |
run: | | |
cat /proc/version | |
- name: Display EC2 information | |
shell: bash | |
run: | | |
set -euo pipefail | |
function get_ec2_metadata() { | |
# Pulled from instance metadata endpoint for EC2 | |
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html | |
category=$1 | |
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}" | |
} | |
echo "ami-id: $(get_ec2_metadata ami-id)" | |
echo "instance-id: $(get_ec2_metadata instance-id)" | |
echo "instance-type: $(get_ec2_metadata instance-type)" | |
- name: check gpu info | |
shell: bash | |
run: | | |
sudo yum install lshw -y | |
sudo lshw -C display | |
# Checkout the repository to the GitHub Actions runner | |
- name: Checkout | |
uses: actions/checkout@v2 | |
with: | |
submodules: true | |
# Update references | |
- name: Git Sumbodule Update | |
run: | | |
cd fbgemm_gpu/ | |
git submodule sync | |
git submodule update --init --recursive | |
git log | |
- name: Update pip | |
run: | | |
sudo yum update -y | |
sudo yum -y install git python3-pip | |
sudo pip3 install --upgrade pip | |
- name: Setup conda | |
run: | | |
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh | |
bash ~/miniconda.sh -b -p $HOME/miniconda -u | |
- name: setup Path | |
run: | | |
echo "/home/ec2-user/miniconda/bin" >> $GITHUB_PATH | |
echo "CONDA=/home/ec2-user/miniconda" >> $GITHUB_PATH | |
- name: create conda env | |
run: | | |
conda create --name build_binary python=${{ matrix.python-version }} | |
conda info | |
- name: check python version no Conda | |
run: | | |
python --version | |
- name: check python version | |
run: | | |
conda run -n build_binary python --version | |
- name: Install CUDA 11.3 | |
shell: bash | |
run: | | |
sudo yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm | |
sudo yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo | |
sudo yum clean expire-cache | |
sudo yum install -y nvidia-driver-latest-dkms | |
sudo yum install -y cuda-11-3 | |
sudo yum install -y cuda-drivers | |
sudo yum install -y libcudnn8-devel | |
- name: setup Path | |
run: | | |
echo /usr/local/cuda-11.3/bin >> $GITHUB_PATH | |
echo /usr/local/bin >> $GITHUB_PATH | |
- name: nvcc check | |
run: | | |
nvcc --version | |
- name: Install PyTorch | |
shell: bash | |
run: | | |
conda run -n build_binary \ | |
python -m pip install --pre torch -f https://download.pytorch.org/whl/test/cu113/torch_test.html | |
# download wheel from GHA | |
- name: Download wheel | |
uses: actions/download-artifact@v2 | |
with: | |
name: fbgemm_gpu_${{ matrix.python-version }}_${{ matrix.cuda-tag }}.whl | |
- name: Display structure of downloaded files | |
run: ls -R | |
- name: Install Dependencies | |
shell: bash | |
run: | | |
cd fbgemm_gpu/ | |
conda run -n build_binary python -m pip install -r requirements.txt | |
- name: Test Installation of dependencies | |
run: | | |
cd fbgemm_gpu/ | |
conda run -n build_binary python -c "import torch.distributed" | |
echo "torch.distributed succeeded" | |
conda run -n build_binary python -c "import skbuild" | |
echo "skbuild succeeded" | |
conda run -n build_binary python -c "import numpy" | |
echo "numpy succeeded" | |
- name: Install FBGEMM_GPU Release | |
run: | | |
rm -r dist || true | |
conda run -n build_binary \ | |
python -m pip install *.whl | |
- name: Test fbgemm_gpu installation | |
shell: bash | |
run: | | |
conda run -n build_binary \ | |
python -c "import fbgemm_gpu" | |
- name: Test with pytest | |
# remove this line when we fixed all the unit tests | |
continue-on-error: true | |
run: | | |
conda run -n build_binary \ | |
python -m pip install pytest | |
# The tests with single CPU core on a less powerful testing GPU in GHA | |
# can take 5 hours. | |
timeout 600s conda run -n build_binary \ | |
python -m pytest -v -s -W ignore::pytest.PytestCollectionWarning --continue-on-collection-errors | |
# Push to Pypi | |
- name: Push FBGEMM_GPU Binary to PYPI | |
env: | |
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} | |
run: | | |
conda run -n build_binary python -m pip install twine | |
# Official PYPI website | |
conda run -n build_binary \ | |
python -m twine upload \ | |
--username __token__ \ | |
--password "$PYPI_TOKEN" \ | |
--skip-existing \ | |
--verbose \ | |
fbgemm_gpu-*.whl |