diff --git a/.github/workflows/inductor-cu124-unittest.yml b/.github/workflows/inductor-cu124-unittest.yml new file mode 100644 index 00000000000000..85a367b7dd8329 --- /dev/null +++ b/.github/workflows/inductor-cu124-unittest.yml @@ -0,0 +1,82 @@ +# Workflow: Inductor Cu124 Unit Test +# 1. runs unit tests for inductor-cu124. +# 2. perfoms daily memory leak checks and reruns of disabled tests, scheduled at `29 8 * * *`. +name: inductor-cu124-unittest + +on: + workflow_call: + workflow_dispatch: + schedule: + - cron: 29 8 * * * # about 1:29am PDT, for mem leak check and rerun disabled tests + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-unittest + cancel-in-progress: true + +permissions: read-all + +jobs: + get-default-label-prefix: + name: get-default-label-prefix + uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main + if: ${{ (github.event_name != 'schedule' || github.repository == 'pytorch/pytorch') && github.repository_owner == 'pytorch' }} + with: + triggering_actor: ${{ github.triggering_actor }} + issue_owner: ${{ github.event.pull_request.user.login || github.event.issue.user.login }} + curr_branch: ${{ github.head_ref || github.ref_name }} + curr_ref_type: ${{ github.ref_type }} + + linux-focal-cuda12_4-py3_10-gcc9-inductor-build: + name: cuda12.4-py3.10-gcc9-sm86 + uses: ./.github/workflows/_linux-build.yml + needs: get-default-label-prefix + with: + runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" + sync-tag: linux-focal-cuda12_4-py3_10-gcc9-inductor-build + build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86 + docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks + cuda-arch-list: '8.6' + test-matrix: | + { include: [ + { config: "inductor", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, + { config: "inductor", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, + { config: "inductor_distributed", shard: 1, num_shards: 1, runner: "linux.g5.12xlarge.nvidia.gpu" }, + { config: "inductor_cpp_wrapper", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" }, + ]} + secrets: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + + linux-focal-cuda12_4-py3_10-gcc9-inductor-test: + name: cuda12.4-py3.10-gcc9-sm86 + uses: ./.github/workflows/_linux-test.yml + needs: linux-focal-cuda12_4-py3_10-gcc9-inductor-build + with: + sync-tag: linux-focal-cuda12_4-py3_10-gcc9-inductor-test + build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm86 + docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.docker-image }} + test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-inductor-build.outputs.test-matrix }} + secrets: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} + + linux-focal-cuda12_4-py3_12-gcc9-inductor-build: + if: github.repository_owner == 'pytorch' + name: cuda12.4-py3.12-gcc9-sm86 + uses: ./.github/workflows/_linux-build.yml + with: + build-environment: linux-focal-cuda12.4-py3.12-gcc9-sm86 + docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3.12-gcc9-inductor-benchmarks + cuda-arch-list: '8.6' + test-matrix: | + { include: [ + { config: "inductor", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, + { config: "inductor", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, + ]} + + linux-focal-cuda12_4-py3_12-gcc9-inductor-test: + name: cuda12.4-py3.12-gcc9-sm86 + uses: ./.github/workflows/_linux-test.yml + needs: linux-focal-cuda12_4-py3_12-gcc9-inductor-build + with: + build-environment: linux-focal-cuda12.4-py3.12-gcc9-sm86 + docker-image: ${{ needs.linux-focal-cuda12_4-py3_12-gcc9-inductor-build.outputs.docker-image }} + test-matrix: ${{ needs.linux-focal-cuda12_4-py3_12-gcc9-inductor-build.outputs.test-matrix }} diff --git a/.github/workflows/inductor-cu124.yml b/.github/workflows/inductor-cu124.yml index 34e8c810a12ccc..63a9bb3edd28e7 100644 --- a/.github/workflows/inductor-cu124.yml +++ b/.github/workflows/inductor-cu124.yml @@ -1,3 +1,5 @@ +# Workflow: Inductor +# runs all inductor cu124 tests, including both benchmark tests and unit tests. name: inductor-cu124 on: @@ -9,7 +11,6 @@ on: # Run every 4 hours during the week and every 12 hours on the weekend - cron: 45 0,4,8,12,16,20 * * 1-5 - cron: 45 4,12 * * 0,6 - - cron: 29 8 * * * # about 1:29am PDT, for mem leak check and rerun disabled tests concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }} @@ -18,6 +19,11 @@ concurrency: permissions: read-all jobs: + unit-test: + if: github.repository_owner == 'pytorch' + name: inductor-unittest + uses: ./.github/workflows/inductor-cu124-unittest.yml + get-default-label-prefix: name: get-default-label-prefix uses: pytorch/pytorch/.github/workflows/_runner-determinator.yml@main @@ -40,7 +46,7 @@ jobs: check_experiments: "awsa100" linux-focal-cuda12_4-py3_10-gcc9-inductor-build: - # Should be synced with the one in inductor.yml, but this doesn't run inductor_timm + # Should be synced with the benchmark tests in inductor.yml, but this doesn't run inductor_timm name: cuda12.4-py3.10-gcc9-sm86 uses: ./.github/workflows/_linux-build.yml needs: get-default-label-prefix @@ -52,9 +58,6 @@ jobs: cuda-arch-list: '8.6' test-matrix: | { include: [ - { config: "inductor", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "inductor", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "inductor_distributed", shard: 1, num_shards: 1, runner: "linux.g5.12xlarge.nvidia.gpu" }, { config: "inductor_huggingface", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" }, { config: "inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, { config: "inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, @@ -68,7 +71,6 @@ jobs: { config: "aot_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, { config: "aot_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, { config: "aot_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "inductor_cpp_wrapper", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" }, ]} secrets: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} @@ -112,26 +114,3 @@ jobs: use-gha: anything-non-empty-to-use-gha secrets: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} - - linux-focal-cuda12_4-py3_12-gcc9-inductor-build: - if: github.repository_owner == 'pytorch' - name: cuda12.4-py3.12-gcc9-sm86 - uses: ./.github/workflows/_linux-build.yml - with: - build-environment: linux-focal-cuda12.4-py3.12-gcc9-sm86 - docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3.12-gcc9-inductor-benchmarks - cuda-arch-list: '8.6' - test-matrix: | - { include: [ - { config: "inductor", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "inductor", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" }, - ]} - - linux-focal-cuda12_4-py3_12-gcc9-inductor-test: - name: cuda12.4-py3.12-gcc9-sm86 - uses: ./.github/workflows/_linux-test.yml - needs: linux-focal-cuda12_4-py3_12-gcc9-inductor-build - with: - build-environment: linux-focal-cuda12.4-py3.12-gcc9-sm86 - docker-image: ${{ needs.linux-focal-cuda12_4-py3_12-gcc9-inductor-build.outputs.docker-image }} - test-matrix: ${{ needs.linux-focal-cuda12_4-py3_12-gcc9-inductor-build.outputs.test-matrix }}