.github/workflows/fbgemm_release_build.yml

port perf-bwd-hip branch to rocm6.2 #236

Workflow file for this run

.github/workflows/fbgemm_release_build.yml at 544a7bd

	# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
	# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

	name: Push Binary Release

	on:
	# # For debugging, enable push/pull_request
	# [push, pull_request]
	# # run every day at 10:45 AM
	# schedule:
	# - cron: '45 10 * * *'
	# # or manually trigger it
	# workflow_dispatch:

	jobs:
	# build on cpu hosts and upload to GHA
	build_on_cpu:
	runs-on: ${{ matrix.os }}
	strategy:
	matrix:
	include:
	- os: linux.2xlarge
	python-version: 3.7
	python-tag: "py37"
	cuda-tag: "cu11"
	- os: linux.2xlarge
	python-version: 3.8
	python-tag: "py38"
	cuda-tag: "cu11"
	- os: linux.2xlarge
	python-version: 3.9
	python-tag: "py39"
	cuda-tag: "cu11"
	steps:
	# Checkout the repository to the GitHub Actions runner
	- name: Check ldd --version
	run: ldd --version
	- name: Checkout
	uses: actions/checkout@v2
	with:
	submodules: true
	# Update references
	- name: Git Sumbodule Update
	run: \|
	cd fbgemm_gpu/
	git submodule sync
	git submodule update --init --recursive
	- name: Update pip
	run: \|
	sudo yum update -y
	sudo yum -y install git python3-pip
	sudo pip3 install --upgrade pip
	- name: Setup conda
	run: \|
	wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
	bash ~/miniconda.sh -b -p $HOME/miniconda -u
	- name: Setup PATH with conda
	run: \|
	echo "/home/ec2-user/miniconda/bin" >> $GITHUB_PATH
	echo "CONDA=/home/ec2-user/miniconda" >> $GITHUB_PATH
	- name: Create conda env
	run: \|
	conda create --name build_binary python=${{ matrix.python-version }}
	conda info
	- name: check python version
	run: \|
	conda run -n build_binary python --version
	- name: Install CUDA 11.3
	shell: bash
	run: \|
	sudo yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
	sudo yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo
	sudo yum clean expire-cache
	sudo yum install -y nvidia-driver-latest-dkms
	sudo yum install -y cuda-11-3
	sudo yum install -y cuda-drivers
	sudo yum install -y libcudnn8-devel
	- name: setup Path
	run: \|
	echo /usr/local/cuda-11.3/bin >> $GITHUB_PATH
	echo /usr/local/bin >> $GITHUB_PATH
	- name: nvcc check
	run: \|
	nvcc --version
	- name: Install PyTorch
	shell: bash
	run: \|
	conda run -n build_binary \
	python -m pip install --pre torch -f https://download.pytorch.org/whl/test/cu113/torch_test.html

	- name: Install Dependencies
	shell: bash
	run: \|
	cd fbgemm_gpu/
	conda run -n build_binary python -m pip install -r requirements.txt
	- name: Test Installation of dependencies
	run: \|
	cd fbgemm_gpu/
	conda run -n build_binary python -c "import torch.distributed"
	echo "torch.distributed succeeded"
	conda run -n build_binary python -c "import skbuild"
	echo "skbuild succeeded"
	conda run -n build_binary python -c "import numpy"
	echo "numpy succeeded"
	# for the conda run with quotes, we have to use "\" and double quotes
	# here is the issue: https://github.com/conda/conda/issues/10972
	- name: Build FBGEMM_GPU Release
	run: \|
	cd fbgemm_gpu/
	rm -r dist \|\| true
	# buld cuda7.0;8.0 for v100/a100 arch:
	# Couldn't build more cuda arch due to 100 MB binary size limit from
	# pypi website.
	# manylinux1_x86_64 is specified for pypi upload:
	# distribute python extensions as wheels on Linux
	conda run -n build_binary \
	python setup.py bdist_wheel \
	--package_name=fbgemm_gpu \
	--python-tag=${{ matrix.python-tag }} \
	-DTORCH_CUDA_ARCH_LIST="'7.0;8.0'" \
	--plat-name=manylinux1_x86_64
	ls -lt dist/*.whl
	- name: Upload wheel as GHA artifact
	uses: actions/upload-artifact@v2
	with:
	name: fbgemm_gpu_${{ matrix.python-version }}_${{ matrix.cuda-tag }}.whl
	path: fbgemm_gpu/dist/fbgemm_gpu-*.whl

	# download from GHA, test on gpu and push to pypi
	test_on_gpu:
	runs-on: ${{ matrix.os }}
	strategy:
	matrix:
	os: [linux.4xlarge.nvidia.gpu]
	python-version: [3.7, 3.8, 3.9]
	cuda-tag: ["cu11"]
	needs: build_on_cpu
	steps:
	- name: Check ldd --version
	run: ldd --version
	- name: check cpu info
	shell: bash
	run: \|
	cat /proc/cpuinfo
	- name: check distribution info
	shell: bash
	run: \|
	cat /proc/version
	- name: Display EC2 information
	shell: bash
	run: \|
	set -euo pipefail
	function get_ec2_metadata() {
	# Pulled from instance metadata endpoint for EC2
	# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
	category=$1
	curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
	}
	echo "ami-id: $(get_ec2_metadata ami-id)"
	echo "instance-id: $(get_ec2_metadata instance-id)"
	echo "instance-type: $(get_ec2_metadata instance-type)"
	- name: check gpu info
	shell: bash
	run: \|
	sudo yum install lshw -y
	sudo lshw -C display
	# Checkout the repository to the GitHub Actions runner
	- name: Checkout
	uses: actions/checkout@v2
	with:
	submodules: true
	# Update references
	- name: Git Sumbodule Update
	run: \|
	cd fbgemm_gpu/
	git submodule sync
	git submodule update --init --recursive
	git log
	- name: Update pip
	run: \|
	sudo yum update -y
	sudo yum -y install git python3-pip
	sudo pip3 install --upgrade pip
	- name: Setup conda
	run: \|
	wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
	bash ~/miniconda.sh -b -p $HOME/miniconda -u
	- name: setup Path
	run: \|
	echo "/home/ec2-user/miniconda/bin" >> $GITHUB_PATH
	echo "CONDA=/home/ec2-user/miniconda" >> $GITHUB_PATH
	- name: create conda env
	run: \|
	conda create --name build_binary python=${{ matrix.python-version }}
	conda info
	- name: check python version no Conda
	run: \|
	python --version
	- name: check python version
	run: \|
	conda run -n build_binary python --version
	- name: Install CUDA 11.3
	shell: bash
	run: \|
	sudo yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
	sudo yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo
	sudo yum clean expire-cache
	sudo yum install -y nvidia-driver-latest-dkms
	sudo yum install -y cuda-11-3
	sudo yum install -y cuda-drivers
	sudo yum install -y libcudnn8-devel
	- name: setup Path
	run: \|
	echo /usr/local/cuda-11.3/bin >> $GITHUB_PATH
	echo /usr/local/bin >> $GITHUB_PATH
	- name: nvcc check
	run: \|
	nvcc --version
	- name: Install PyTorch
	shell: bash
	run: \|
	conda run -n build_binary \
	python -m pip install --pre torch -f https://download.pytorch.org/whl/test/cu113/torch_test.html
	# download wheel from GHA
	- name: Download wheel
	uses: actions/download-artifact@v2
	with:
	name: fbgemm_gpu_${{ matrix.python-version }}_${{ matrix.cuda-tag }}.whl
	- name: Display structure of downloaded files
	run: ls -R
	- name: Install Dependencies
	shell: bash
	run: \|
	cd fbgemm_gpu/
	conda run -n build_binary python -m pip install -r requirements.txt
	- name: Test Installation of dependencies
	run: \|
	cd fbgemm_gpu/
	conda run -n build_binary python -c "import torch.distributed"
	echo "torch.distributed succeeded"
	conda run -n build_binary python -c "import skbuild"
	echo "skbuild succeeded"
	conda run -n build_binary python -c "import numpy"
	echo "numpy succeeded"
	- name: Install FBGEMM_GPU Release
	run: \|
	rm -r dist \|\| true
	conda run -n build_binary \
	python -m pip install *.whl
	- name: Test fbgemm_gpu installation
	shell: bash
	run: \|
	conda run -n build_binary \
	python -c "import fbgemm_gpu"
	- name: Test with pytest
	# remove this line when we fixed all the unit tests
	continue-on-error: true
	run: \|
	conda run -n build_binary \
	python -m pip install pytest
	# The tests with single CPU core on a less powerful testing GPU in GHA
	# can take 5 hours.
	timeout 600s conda run -n build_binary \
	python -m pytest -v -s -W ignore::pytest.PytestCollectionWarning --continue-on-collection-errors
	# Push to Pypi
	- name: Push FBGEMM_GPU Binary to PYPI
	env:
	PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
	run: \|
	conda run -n build_binary python -m pip install twine
	# Official PYPI website
	conda run -n build_binary \
	python -m twine upload \
	--username __token__ \
	--password "$PYPI_TOKEN" \
	--skip-existing \
	--verbose \
	fbgemm_gpu-*.whl

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

port perf-bwd-hip branch to rocm6.2 #236

Workflow file

port perf-bwd-hip branch to rocm6.2 #236

Jobs

Run details

Workflow file for this run