[docker] experiment with nvcr pytorch image for torch 2.5.1 #4945
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Tests | |
on: | |
# check on push/merge to main, PRs, and manual triggers | |
push: | |
branches: | |
- "main" | |
paths: | |
- '**.py' | |
- 'requirements.txt' | |
- '.github/workflows/*.yml' | |
- 'requirements-tests.txt' | |
- 'cicd/cicd.sh' | |
- 'cicd/Dockerfile.jinja' | |
pull_request: | |
paths: | |
- '**.py' | |
- 'requirements.txt' | |
- '.github/workflows/*.yml' | |
- 'requirements-tests.txt' | |
- 'cicd/cicd.sh' | |
- 'cicd/Dockerfile.jinja' | |
workflow_dispatch: | |
# Cancel jobs on the same ref if a new one is triggered | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} | |
jobs: | |
pre-commit: | |
name: pre-commit | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: "3.10" | |
cache: 'pip' # caching pip dependencies | |
- uses: pre-commit/[email protected] | |
env: | |
SKIP: no-commit-to-branch | |
pytest: | |
name: PyTest | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
max-parallel: 2 | |
matrix: | |
python_version: ["3.10", "3.11"] | |
pytorch_version: ["2.3.1", "2.4.1", "2.5.1"] | |
exclude: | |
- python_version: "3.10" | |
pytorch_version: "2.4.1" | |
- python_version: "3.10" | |
pytorch_version: "2.5.1" | |
timeout-minutes: 20 | |
steps: | |
- name: Check out repository code | |
uses: actions/checkout@v4 | |
- name: Setup Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python_version }} | |
cache: 'pip' # caching pip dependencies | |
- name: upgrade pip | |
run: | | |
pip3 install --upgrade pip | |
pip3 install --upgrade packaging setuptools wheel | |
- name: Install PyTorch | |
run: | | |
pip3 install torch==${{ matrix.pytorch_version }} | |
- name: Install dependencies | |
run: | | |
pip3 show torch | |
pip3 install -U -e . | |
python scripts/unsloth_install.py | sh | |
python scripts/cutcrossentropy_install.py | sh | |
pip3 install -r requirements-dev.txt -r requirements-tests.txt | |
- name: Ensure axolotl CLI was installed | |
run: | | |
axolotl --help | |
- name: Run tests | |
run: | | |
pytest -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ tests/ | |
pytest tests/patched/ | |
- name: cleanup pip cache | |
run: | | |
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \; | |
pytest-sdist: | |
name: PyTest from Source Dist | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
max-parallel: 1 | |
matrix: | |
python_version: ["3.11"] | |
pytorch_version: ["2.4.1", "2.5.1"] | |
timeout-minutes: 20 | |
steps: | |
- name: Check out repository code | |
uses: actions/checkout@v4 | |
- name: Setup Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python_version }} | |
cache: 'pip' # caching pip dependencies | |
- name: upgrade pip | |
run: | | |
pip3 install --upgrade pip | |
pip3 install --upgrade packaging setuptools wheel | |
- name: Install PyTorch | |
run: | | |
pip3 install torch==${{ matrix.pytorch_version }} | |
- name: Install dependencies | |
run: | | |
pip3 show torch | |
python3 setup.py sdist | |
pip3 install dist/axolotl*.tar.gz | |
python scripts/unsloth_install.py | sh | |
python scripts/cutcrossentropy_install.py | sh | |
pip3 install -r requirements-dev.txt -r requirements-tests.txt | |
- name: Ensure axolotl CLI was installed | |
run: | | |
axolotl --help | |
- name: Run tests | |
run: | | |
pytest -n8 --dist loadfile --ignore=tests/e2e/ --ignore=tests/patched/ tests/ | |
pytest tests/patched/ | |
- name: cleanup pip cache | |
run: | | |
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \; | |
# docker-e2e-tests-1st: | |
# if: ${{ ! contains(github.event.commits[0].message, '[skip e2e]') && github.repository_owner == 'axolotl-ai-cloud' }} | |
# # this job needs to be run on self-hosted GPU runners... | |
# runs-on: [self-hosted, modal] | |
# timeout-minutes: 90 | |
# needs: [pre-commit, pytest, pytest-sdist] | |
# | |
# strategy: | |
# fail-fast: false | |
# matrix: | |
# include: | |
# - cuda: 124 | |
# cuda_version: 12.4.1 | |
# python_version: "3.11" | |
# pytorch: 2.4.1 | |
# num_gpus: 1 | |
# axolotl_extras: | |
# steps: | |
# - name: Checkout | |
# uses: actions/checkout@v4 | |
# - name: Install Python | |
# uses: actions/setup-python@v5 | |
# with: | |
# python-version: "3.10" | |
# - name: Install Modal | |
# run: | | |
# python -m pip install --upgrade pip | |
# pip install modal==0.63.64 jinja2 | |
# - name: Update env vars | |
# run: | | |
# echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV | |
# echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV | |
# echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV | |
# echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV | |
# echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV | |
# echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV | |
# - name: Run tests job on Modal | |
# run: | | |
# modal run cicd.tests | |
docker-e2e-tests: | |
if: github.repository_owner == 'axolotl-ai-cloud' | |
# this job needs to be run on self-hosted GPU runners... | |
runs-on: [self-hosted, modal] | |
timeout-minutes: 90 | |
# needs: [pre-commit, pytest, docker-e2e-tests-1st] | |
needs: [pre-commit, pytest] | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
# - cuda: 121 | |
# cuda_version: 12.1.1 | |
# python_version: "3.10" | |
# pytorch: 2.3.1 | |
# num_gpus: 1 | |
# axolotl_extras: mamba-ssm | |
- cuda: 124 | |
cuda_version: 12.4.1 | |
python_version: "3.11" | |
pytorch: 2.5.1 | |
num_gpus: 1 | |
axolotl_extras: | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Install Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: "3.10" | |
- name: Install Modal | |
run: | | |
python -m pip install --upgrade pip | |
pip install modal==0.63.64 jinja2 | |
- name: Update env vars | |
run: | | |
echo "BASE_TAG=pr-2139-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV | |
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV | |
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV | |
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV | |
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV | |
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV | |
- name: Run tests job on Modal | |
run: | | |
modal run cicd.tests |