diff --git a/.github/workflows/pr-gpu.yaml b/.github/workflows/pr-gpu.yaml index 14ee0cc9d4..f6de8908c1 100644 --- a/.github/workflows/pr-gpu.yaml +++ b/.github/workflows/pr-gpu.yaml @@ -1,105 +1,88 @@ name: PR GPU tests on: - push: - branches: - - main - - release/* - pull_request: - branches: - - main - - release/** + pull_request_target: workflow_dispatch: -permissions: - contents: read -# Cancel old runs when a new commit is pushed to the same branch if not on main or dev +# Cancel old runs when a new commit is pushed to the same branch if not on main +# or dev concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} + cancel-in-progress: ${{ github.ref != 'refs/heads/main' && github.ref != 'refs/heads/dev' }} jobs: pytest-gpu-1: - name: ${{ matrix.name }} - if: github.repository_owner == 'mosaicml' - runs-on: linux-ubuntu-latest + uses: mosaicml/ci-testing/.github/workflows/pytest-gpu.yaml@v0.0.9 strategy: - fail-fast: false matrix: include: - - name: "gpu-2.3.1-1" - container: mosaicml/llm-foundry:2.3.1_cu121-latest - markers: "gpu" - pip_deps: "[all]" - pytest_command: "coverage run -m pytest" - ci_repo_gpu_test_ref: v0.1.0 - steps: - - name: Run PR GPU Tests - uses: mosaicml/ci-testing/.github/actions/pytest-gpu@v0.1.0 - with: - container: ${{ matrix.container }} - git_repo: mosaicml/llm-foundry - mcloud_timeout: 1800 - name: ${{ matrix.name }} - pip_deps: ${{ matrix.pip_deps }} - pytest_command: ${{ matrix.pytest_command }} - pytest_markers: ${{ matrix.markers }} - python_version: 3.9 - gpu_num: 1 - mcloud_api_key: ${{ secrets.MCLOUD_API_KEY }} - ci_repo_gpu_test_ref: ${{ matrix.ci_repo_gpu_test_ref }} - pytest-gpu-2: + - name: gpu-3.11-2.3-1 + container: mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04 + markers: not daily and not remote and gpu and (doctest or not doctest) + pytest_command: coverage run -m pytest + composer_package_name: mosaicml name: ${{ matrix.name }} if: github.repository_owner == 'mosaicml' - runs-on: linux-ubuntu-latest + with: + composer_package_name: ${{ matrix.composer_package_name }} + container: ${{ matrix.container }} + git_repo: mosaicml/composer + mcloud-timeout: 2250 + name: ${{ matrix.name }} + pip_deps: "[all]" + pytest-command: ${{ matrix.pytest_command }} + pytest-markers: ${{ matrix.markers }} + python-version: 3.9 + gpu_num: 1 + secrets: + mcloud-api-key: ${{ secrets.MCLOUD_API_KEY }} + + pytest-gpu-2: + uses: mosaicml/ci-testing/.github/workflows/pytest-gpu.yaml@v0.0.9 strategy: - fail-fast: false matrix: include: - - name: "gpu-2.3.1-2" - container: mosaicml/llm-foundry:2.3.1_cu121-latest - markers: "gpu" - pip_deps: "[all]" - pytest_command: "coverage run -m pytest" - ci_repo_gpu_test_ref: v0.1.0 - steps: - - name: Run PR GPU Tests - uses: mosaicml/ci-testing/.github/actions/pytest-gpu@v0.1.0 - with: - container: ${{ matrix.container }} - git_repo: mosaicml/llm-foundry - mcloud_timeout: 1800 - name: ${{ matrix.name }} - pip_deps: ${{ matrix.pip_deps }} - pytest_command: ${{ matrix.pytest_command }} - pytest_markers: ${{ matrix.markers }} - python_version: 3.9 - gpu_num: 2 - mcloud_api_key: ${{ secrets.MCLOUD_API_KEY }} - ci_repo_gpu_test_ref: ${{ matrix.ci_repo_gpu_test_ref }} - pytest-gpu-4: + - name: gpu-3.11-2.3-2 + container: mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04 + markers: not daily and not remote and gpu and (doctest or not doctest) + pytest_command: coverage run -m pytest + composer_package_name: mosaicml name: ${{ matrix.name }} if: github.repository_owner == 'mosaicml' - runs-on: linux-ubuntu-latest + with: + composer_package_name: ${{ matrix.composer_package_name }} + container: ${{ matrix.container }} + git_repo: mosaicml/composer + mcloud-timeout: 2250 + name: ${{ matrix.name }} + pip_deps: "[all]" + pytest-command: ${{ matrix.pytest_command }} + pytest-markers: ${{ matrix.markers }} + python-version: 3.9 + gpu_num: 2 + secrets: + mcloud-api-key: ${{ secrets.MCLOUD_API_KEY }} + + + pytest-gpu-4: + uses: mosaicml/ci-testing/.github/workflows/pytest-gpu.yaml@v0.0.9 strategy: - fail-fast: false matrix: include: - - name: "gpu-2.3.1-4" - container: mosaicml/llm-foundry:2.3.1_cu121-latest - markers: "gpu" - pip_deps: "[all]" - pytest_command: "coverage run -m pytest" - ci_repo_gpu_test_ref: v0.1.0 - steps: - - name: Run PR GPU Tests - uses: mosaicml/ci-testing/.github/actions/pytest-gpu@v0.1.0 - with: - container: ${{ matrix.container }} - git_repo: mosaicml/llm-foundry - mcloud_timeout: 1800 - name: ${{ matrix.name }} - pip_deps: ${{ matrix.pip_deps }} - pytest_command: ${{ matrix.pytest_command }} - pytest_markers: ${{ matrix.markers }} - python_version: 3.9 - gpu_num: 4 - mcloud_api_key: ${{ secrets.MCLOUD_API_KEY }} - ci_repo_gpu_test_ref: ${{ matrix.ci_repo_gpu_test_ref }} + - name: gpu-3.11-2.3-4 + container: mosaicml/pytorch:2.3.1_cu121-python3.11-ubuntu20.04 + markers: not daily and not remote and gpu and (doctest or not doctest) + pytest_command: coverage run -m pytest + composer_package_name: mosaicml + name: ${{ matrix.name }} + if: github.repository_owner == 'mosaicml' + with: + composer_package_name: ${{ matrix.composer_package_name }} + container: ${{ matrix.container }} + git_repo: mosaicml/composer + mcloud-timeout: 2250 + name: ${{ matrix.name }} + pip_deps: "[all]" + pytest-command: ${{ matrix.pytest_command }} + pytest-markers: ${{ matrix.markers }} + python-version: 3.9 + gpu_num: 4 + secrets: + mcloud-api-key: ${{ secrets.MCLOUD_API_KEY }}