Skip to content

Implement Router Z-loss (#151) #101

Implement Router Z-loss (#151)

Implement Router Z-loss (#151) #101

Workflow file for this run

name: PR GPU tests
on:
push:
branches:
- main
- release/*
pull_request_target:
branches:
- main
- release/**
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main'}}
jobs:
pytest-gpu:
name: ${{ matrix.name }}
if: github.repository_owner == 'databricks'
runs-on: ubuntu-latest # TODO: switch to linux-ubuntu-latest later
strategy:
fail-fast: false
matrix:
include:
- name: "python3.11-pytorch2.4.0-gpus1"
gpu_num: 1
python_version: 3.11
container: mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04
- name: "python3.11-pytorch2.4.0-gpus2"
gpu_num: 2
python_version: 3.11
container: mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04
steps:
- name: Run PR GPU tests
uses: mosaicml/ci-testing/.github/actions/[email protected]
with:
name: ${{ matrix.name }}
container: ${{ matrix.container }}
python_version: ${{ matrix.python_version }}
gpu_num: ${{ matrix.gpu_num }}
git_repo: databricks/megablocks
pip_deps: "[all,testing]"
pytest_command: "coverage run -m pytest tests"
# TODO: remove tests from pytest tests when we delete all tests in the MegaBlocks dir
pytest_markers: "gpu"
composer_package_name: mosaicml # Required as Composer is built from source
mcloud_timeout: 3600
mcloud_api_key: ${{ secrets.MCLOUD_API_KEY }}