From d38ed44e9dc3a7b8089a9ad57452c2035a7e3ac7 Mon Sep 17 00:00:00 2001 From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com> Date: Tue, 3 Dec 2024 14:22:04 +0100 Subject: [PATCH 01/10] Use AMD CI workflow defined in hf-workflows --- .../workflows/self-push-amd-mi210-caller.yml | 50 +-- .../workflows/self-push-amd-mi250-caller.yml | 50 +-- .../workflows/self-push-amd-mi300-caller.yml | 2 +- .github/workflows/self-push-amd.yml | 335 ------------------ 4 files changed, 51 insertions(+), 386 deletions(-) delete mode 100644 .github/workflows/self-push-amd.yml diff --git a/.github/workflows/self-push-amd-mi210-caller.yml b/.github/workflows/self-push-amd-mi210-caller.yml index a401e40ee7f164..08b73610563089 100644 --- a/.github/workflows/self-push-amd-mi210-caller.yml +++ b/.github/workflows/self-push-amd-mi210-caller.yml @@ -1,25 +1,25 @@ -name: Self-hosted runner (AMD mi210 CI caller) - -on: - workflow_run: - workflows: ["Self-hosted runner (push-caller)"] - branches: ["main"] - types: [completed] - push: - branches: - - run_amd_push_ci_caller* - paths: - - "src/**" - - "tests/**" - - ".github/**" - - "templates/**" - - "utils/**" - -jobs: - run_amd_ci: - name: AMD mi210 - if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller'))) - uses: ./.github/workflows/self-push-amd.yml - with: - gpu_flavor: mi210 - secrets: inherit +name: Self-hosted runner (AMD mi210 CI caller) + +on: + workflow_run: + workflows: ["Self-hosted runner (push-caller)"] + branches: ["main"] + types: [completed] + push: + branches: + - run_amd_push_ci_caller* + paths: + - "src/**" + - "tests/**" + - ".github/**" + - "templates/**" + - "utils/**" + +jobs: + run_amd_ci: + name: AMD mi210 + if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller'))) + uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci.yaml@main + with: + gpu_flavor: mi210 + secrets: inherit diff --git a/.github/workflows/self-push-amd-mi250-caller.yml b/.github/workflows/self-push-amd-mi250-caller.yml index fef532703170cb..b83928052cfc9e 100644 --- a/.github/workflows/self-push-amd-mi250-caller.yml +++ b/.github/workflows/self-push-amd-mi250-caller.yml @@ -1,25 +1,25 @@ -name: Self-hosted runner (AMD mi250 CI caller) - -on: - workflow_run: - workflows: ["Self-hosted runner (push-caller)"] - branches: ["main"] - types: [completed] - push: - branches: - - run_amd_push_ci_caller* - paths: - - "src/**" - - "tests/**" - - ".github/**" - - "templates/**" - - "utils/**" - -jobs: - run_amd_ci: - name: AMD mi250 - if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller'))) - uses: ./.github/workflows/self-push-amd.yml - with: - gpu_flavor: mi250 - secrets: inherit +name: Self-hosted runner (AMD mi250 CI caller) + +on: + workflow_run: + workflows: ["Self-hosted runner (push-caller)"] + branches: ["main"] + types: [completed] + push: + branches: + - run_amd_push_ci_caller* + paths: + - "src/**" + - "tests/**" + - ".github/**" + - "templates/**" + - "utils/**" + +jobs: + run_amd_ci: + name: AMD mi250 + if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller'))) + uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci.yaml@main + with: + gpu_flavor: mi250 + secrets: inherit diff --git a/.github/workflows/self-push-amd-mi300-caller.yml b/.github/workflows/self-push-amd-mi300-caller.yml index a8ee4e540ecf3f..cb1a315be7e819 100644 --- a/.github/workflows/self-push-amd-mi300-caller.yml +++ b/.github/workflows/self-push-amd-mi300-caller.yml @@ -19,7 +19,7 @@ jobs: run_amd_ci: name: AMD mi300 if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && (startsWith(github.ref_name, 'run_amd_push_ci_caller') || startsWith(github.ref_name, 'mi300-ci')))) - uses: ./.github/workflows/self-push-amd.yml + uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci.yaml@main with: gpu_flavor: mi300 secrets: inherit diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml deleted file mode 100644 index 6931c2f3eadcad..00000000000000 --- a/.github/workflows/self-push-amd.yml +++ /dev/null @@ -1,335 +0,0 @@ -name: Self-hosted runner AMD GPU (push) - -on: - workflow_call: - inputs: - gpu_flavor: - required: true - type: string - -env: - HF_HOME: /mnt/cache - TRANSFORMERS_IS_CI: yes - OMP_NUM_THREADS: 8 - MKL_NUM_THREADS: 8 - PYTEST_TIMEOUT: 60 - TF_FORCE_GPU_ALLOW_GROWTH: true - RUN_PT_TF_CROSS_TESTS: 1 - HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} - -jobs: - check_runner_status: - name: Check Runner Status - runs-on: ubuntu-22.04 - steps: - - name: Checkout transformers - uses: actions/checkout@v4 - with: - fetch-depth: 2 - - - name: Check Runner Status - run: python utils/check_self_hosted_runner.py --target_runners amd-mi210-single-gpu-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} - - check_runners: - name: Check Runners - needs: check_runner_status - strategy: - matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] - container: - image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now - options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - steps: - - name: ROCM-SMI - run: | - rocm-smi - - name: ROCM-INFO - run: | - rocminfo | grep "Agent" -A 14 - - name: Show ROCR environment - run: | - echo "ROCR: $ROCR_VISIBLE_DEVICES" - - setup_gpu: - name: Setup - needs: check_runners - strategy: - matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] - container: - image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now - options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} - test_map: ${{ steps.set-matrix.outputs.test_map }} - env: - # `CI_BRANCH_PUSH`: The branch name from the push event - # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event - # `CI_SHA_PUSH`: The commit SHA from the push event - # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event - CI_BRANCH_PUSH: ${{ github.event.ref }} - CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH: ${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} - steps: - # Necessary to get the correct branch name and commit SHA for `workflow_run` event - # We also take into account the `push` event (we might want to test some changes in a branch) - - name: Prepare custom environment variables - shell: bash - # `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty) - # `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty) - run: | - CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - echo $CI_BRANCH_PUSH - echo $CI_BRANCH_WORKFLOW_RUN - echo $CI_SHA_PUSH - echo $CI_SHA_WORKFLOW_RUN - [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV - [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV - - - name: print environment variables - run: | - echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" - echo "env.CI_SHA = ${{ env.CI_SHA }}" - - - name: Update clone using environment variables - working-directory: /transformers - run: | - echo "original branch = $(git branch --show-current)" - git fetch && git checkout ${{ env.CI_BRANCH }} - echo "updated branch = $(git branch --show-current)" - git checkout ${{ env.CI_SHA }} - echo "log = $(git log -n 1)" - - - name: Cleanup - working-directory: /transformers - run: | - rm -rf tests/__pycache__ - rm -rf tests/models/__pycache__ - rm -rf reports - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Fetch the tests to run - working-directory: /transformers - # TODO: add `git-python` in the docker images - run: | - pip install --upgrade git-python - python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt - - - name: Report fetched tests - uses: actions/upload-artifact@v4 - with: - name: test_fetched - path: /transformers/test_preparation.txt - - - id: set-matrix - name: Organize tests into models - working-directory: /transformers - # The `keys` is used as GitHub actions matrix for jobs, i.e. `models/bert`, `tokenization`, `pipeline`, etc. - # The `test_map` is used to get the actual identified test files under each key. - # If no test to run (so no `test_map.json` file), create a dummy map (empty matrix will fail) - run: | - if [ -f test_map.json ]; then - keys=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); d = list(test_map.keys()); print(d)') - test_map=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); print(test_map)') - else - keys=$(python3 -c 'keys = ["dummy"]; print(keys)') - test_map=$(python3 -c 'test_map = {"dummy": []}; print(test_map)') - fi - echo $keys - echo $test_map - echo "matrix=$keys" >> $GITHUB_OUTPUT - echo "test_map=$test_map" >> $GITHUB_OUTPUT - - run_models_gpu: - name: Model tests - needs: setup_gpu - # `dummy` means there is no test to run - if: contains(fromJson(needs.setup_gpu.outputs.matrix), 'dummy') != true - strategy: - fail-fast: false - matrix: - folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }} - machine_type: [single-gpu, multi-gpu] - runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] - container: - image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now - options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - env: - # For the meaning of these environment variables, see the job `Setup` - CI_BRANCH_PUSH: ${{ github.event.ref }} - CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH: ${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} - steps: - # Necessary to get the correct branch name and commit SHA for `workflow_run` event - # We also take into account the `push` event (we might want to test some changes in a branch) - - name: Prepare custom environment variables - shell: bash - # For the meaning of these environment variables, see the job `Setup` - run: | - CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - echo $CI_BRANCH_PUSH - echo $CI_BRANCH_WORKFLOW_RUN - echo $CI_SHA_PUSH - echo $CI_SHA_WORKFLOW_RUN - [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV - [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV - - - name: print environment variables - run: | - echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" - echo "env.CI_SHA = ${{ env.CI_SHA }}" - - - name: Update clone using environment variables - working-directory: /transformers - run: | - echo "original branch = $(git branch --show-current)" - git fetch && git checkout ${{ env.CI_BRANCH }} - echo "updated branch = $(git branch --show-current)" - git checkout ${{ env.CI_SHA }} - echo "log = $(git log -n 1)" - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: Echo folder ${{ matrix.folders }} - shell: bash - # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to - # set the artifact folder names (because the character `/` is not allowed). - run: | - echo "${{ matrix.folders }}" - echo "${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }}" - matrix_folders=${{ matrix.folders }} - matrix_folders=${matrix_folders/'models/'/'models_'} - echo "$matrix_folders" - echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV - - - name: ROCM-SMI - run: | - rocm-smi - - name: ROCM-INFO - run: | - rocminfo | grep "Agent" -A 14 - - name: Show ROCR environment - run: | - echo "ROCR: $ROCR_VISIBLE_DEVICES" - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Run all non-slow selected tests on GPU - working-directory: /transformers - run: | - python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports ${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }} -m "not not_device_test" - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports - - send_results: - name: Send results to webhook - runs-on: ubuntu-22.04 - if: always() - needs: [ - check_runner_status, - check_runners, - setup_gpu, - run_models_gpu, -# run_tests_torch_cuda_extensions_single_gpu, -# run_tests_torch_cuda_extensions_multi_gpu - ] - env: - # For the meaning of these environment variables, see the job `Setup` - CI_BRANCH_PUSH: ${{ github.event.ref }} - CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} - CI_SHA_PUSH: ${{ github.event.head_commit.id }} - CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} - steps: - - name: Preliminary job status - shell: bash - # For the meaning of these environment variables, see the job `Setup` - run: | - echo "Runner availability: ${{ needs.check_runner_status.result }}" - echo "Setup status: ${{ needs.setup_gpu.result }}" - echo "Runner status: ${{ needs.check_runners.result }}" - - # Necessary to get the correct branch name and commit SHA for `workflow_run` event - # We also take into account the `push` event (we might want to test some changes in a branch) - - name: Prepare custom environment variables - shell: bash - # For the meaning of these environment variables, see the job `Setup` - run: | - CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} - echo $CI_BRANCH_PUSH - echo $CI_BRANCH_WORKFLOW_RUN - echo $CI_SHA_PUSH - echo $CI_SHA_WORKFLOW_RUN - [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV - [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV - - - name: print environment variables - run: | - echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" - echo "env.CI_SHA = ${{ env.CI_SHA }}" - - - uses: actions/checkout@v4 - # To avoid failure when multiple commits are merged into `main` in a short period of time. - # Checking out to an old commit beyond the fetch depth will get an error `fatal: reference is not a tree: ... - # (Only required for `workflow_run` event, where we get the latest HEAD on `main` instead of the event commit) - with: - fetch-depth: 20 - - - name: Update clone using environment variables - run: | - echo "original branch = $(git branch --show-current)" - git fetch && git checkout ${{ env.CI_BRANCH }} - echo "updated branch = $(git branch --show-current)" - git checkout ${{ env.CI_SHA }} - echo "log = $(git log -n 1)" - - - uses: actions/download-artifact@v4 - - name: Send message to Slack - env: - CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} - CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} - CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} - CI_SLACK_CHANNEL_ID_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }} - CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} - CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }} - ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} - CI_EVENT: Push CI (AMD) - ${{ inputs.gpu_flavor }} - CI_TITLE_PUSH: ${{ github.event.head_commit.message }} - CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }} - CI_SHA: ${{ env.CI_SHA }} - RUNNER_STATUS: ${{ needs.check_runner_status.result }} - RUNNER_ENV_STATUS: ${{ needs.check_runners.result }} - SETUP_STATUS: ${{ needs.setup_gpu.result }} - - # We pass `needs.setup_gpu.outputs.matrix` as the argument. A processing in `notification_service.py` to change - # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`. - run: | - pip install huggingface_hub - pip install slack_sdk - pip show slack_sdk - python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}" From 32b371863f3801c7517c5176dd6fd6ee10c195f7 Mon Sep 17 00:00:00 2001 From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com> Date: Wed, 4 Dec 2024 13:35:23 +0100 Subject: [PATCH 02/10] Use hf-workflows for both push and scheduled AMD CI --- .../workflows/self-push-amd-mi210-caller.yml | 2 +- .../workflows/self-push-amd-mi250-caller.yml | 2 +- .../workflows/self-push-amd-mi300-caller.yml | 2 +- .../self-scheduled-amd-mi210-caller.yml | 110 +++--- .../self-scheduled-amd-mi250-caller.yml | 110 +++--- .github/workflows/self-scheduled-amd.yml | 349 ------------------ 6 files changed, 113 insertions(+), 462 deletions(-) delete mode 100644 .github/workflows/self-scheduled-amd.yml diff --git a/.github/workflows/self-push-amd-mi210-caller.yml b/.github/workflows/self-push-amd-mi210-caller.yml index 08b73610563089..5612304389581d 100644 --- a/.github/workflows/self-push-amd-mi210-caller.yml +++ b/.github/workflows/self-push-amd-mi210-caller.yml @@ -19,7 +19,7 @@ jobs: run_amd_ci: name: AMD mi210 if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller'))) - uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci.yaml@main + uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_push.yaml@main with: gpu_flavor: mi210 secrets: inherit diff --git a/.github/workflows/self-push-amd-mi250-caller.yml b/.github/workflows/self-push-amd-mi250-caller.yml index b83928052cfc9e..ea2801f2c4a4e7 100644 --- a/.github/workflows/self-push-amd-mi250-caller.yml +++ b/.github/workflows/self-push-amd-mi250-caller.yml @@ -19,7 +19,7 @@ jobs: run_amd_ci: name: AMD mi250 if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller'))) - uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci.yaml@main + uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_push.yaml@main with: gpu_flavor: mi250 secrets: inherit diff --git a/.github/workflows/self-push-amd-mi300-caller.yml b/.github/workflows/self-push-amd-mi300-caller.yml index cb1a315be7e819..08d9155419465a 100644 --- a/.github/workflows/self-push-amd-mi300-caller.yml +++ b/.github/workflows/self-push-amd-mi300-caller.yml @@ -19,7 +19,7 @@ jobs: run_amd_ci: name: AMD mi300 if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && (startsWith(github.ref_name, 'run_amd_push_ci_caller') || startsWith(github.ref_name, 'mi300-ci')))) - uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci.yaml@main + uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_push.yaml@main with: gpu_flavor: mi300 secrets: inherit diff --git a/.github/workflows/self-scheduled-amd-mi210-caller.yml b/.github/workflows/self-scheduled-amd-mi210-caller.yml index 1c79b38a314e0b..6109faca00932e 100644 --- a/.github/workflows/self-scheduled-amd-mi210-caller.yml +++ b/.github/workflows/self-scheduled-amd-mi210-caller.yml @@ -1,55 +1,55 @@ -name: Self-hosted runner (AMD mi210 scheduled CI caller) - -on: - workflow_run: - workflows: ["Self-hosted runner (AMD scheduled CI caller)"] - branches: ["main"] - types: [completed] - push: - branches: - - run_amd_scheduled_ci_caller* - -jobs: - model-ci: - name: Model CI - uses: ./.github/workflows/self-scheduled-amd.yml - with: - job: run_models_gpu - slack_report_channel: "#transformers-ci-daily-amd" - runner: mi210 - docker: huggingface/transformers-pytorch-amd-gpu - ci_event: Scheduled CI (AMD) - mi210 - secrets: inherit - - torch-pipeline: - name: Torch pipeline CI - uses: ./.github/workflows/self-scheduled-amd.yml - with: - job: run_pipelines_torch_gpu - slack_report_channel: "#transformers-ci-daily-amd" - runner: mi210 - docker: huggingface/transformers-pytorch-amd-gpu - ci_event: Scheduled CI (AMD) - mi210 - secrets: inherit - - example-ci: - name: Example CI - uses: ./.github/workflows/self-scheduled-amd.yml - with: - job: run_examples_gpu - slack_report_channel: "#transformers-ci-daily-amd" - runner: mi210 - docker: huggingface/transformers-pytorch-amd-gpu - ci_event: Scheduled CI (AMD) - mi210 - secrets: inherit - - deepspeed-ci: - name: DeepSpeed CI - uses: ./.github/workflows/self-scheduled-amd.yml - with: - job: run_torch_cuda_extensions_gpu - slack_report_channel: "#transformers-ci-daily-amd" - runner: mi210 - docker: huggingface/transformers-pytorch-deepspeed-amd-gpu - ci_event: Scheduled CI (AMD) - mi210 - secrets: inherit +name: Self-hosted runner (AMD mi210 scheduled CI caller) + +on: + workflow_run: + workflows: ["Self-hosted runner (AMD scheduled CI caller)"] + branches: ["main"] + types: [completed] + push: + branches: + - run_amd_scheduled_ci_caller* + +jobs: + model-ci: + name: Model CI + uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main + with: + job: run_models_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi210 + docker: huggingface/transformers-pytorch-amd-gpu + ci_event: Scheduled CI (AMD) - mi210 + secrets: inherit + + torch-pipeline: + name: Torch pipeline CI + uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main + with: + job: run_pipelines_torch_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi210 + docker: huggingface/transformers-pytorch-amd-gpu + ci_event: Scheduled CI (AMD) - mi210 + secrets: inherit + + example-ci: + name: Example CI + uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main + with: + job: run_examples_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi210 + docker: huggingface/transformers-pytorch-amd-gpu + ci_event: Scheduled CI (AMD) - mi210 + secrets: inherit + + deepspeed-ci: + name: DeepSpeed CI + uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main + with: + job: run_torch_cuda_extensions_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi210 + docker: huggingface/transformers-pytorch-deepspeed-amd-gpu + ci_event: Scheduled CI (AMD) - mi210 + secrets: inherit diff --git a/.github/workflows/self-scheduled-amd-mi250-caller.yml b/.github/workflows/self-scheduled-amd-mi250-caller.yml index fd151305716396..a33b6e579c0ef3 100644 --- a/.github/workflows/self-scheduled-amd-mi250-caller.yml +++ b/.github/workflows/self-scheduled-amd-mi250-caller.yml @@ -1,55 +1,55 @@ -name: Self-hosted runner (AMD mi250 scheduled CI caller) - -on: - workflow_run: - workflows: ["Self-hosted runner (AMD scheduled CI caller)"] - branches: ["main"] - types: [completed] - push: - branches: - - run_amd_scheduled_ci_caller* - -jobs: - model-ci: - name: Model CI - uses: ./.github/workflows/self-scheduled-amd.yml - with: - job: run_models_gpu - slack_report_channel: "#transformers-ci-daily-amd" - runner: mi250 - docker: huggingface/transformers-pytorch-amd-gpu - ci_event: Scheduled CI (AMD) - mi250 - secrets: inherit - - torch-pipeline: - name: Torch pipeline CI - uses: ./.github/workflows/self-scheduled-amd.yml - with: - job: run_pipelines_torch_gpu - slack_report_channel: "#transformers-ci-daily-amd" - runner: mi250 - docker: huggingface/transformers-pytorch-amd-gpu - ci_event: Scheduled CI (AMD) - mi250 - secrets: inherit - - example-ci: - name: Example CI - uses: ./.github/workflows/self-scheduled-amd.yml - with: - job: run_examples_gpu - slack_report_channel: "#transformers-ci-daily-amd" - runner: mi250 - docker: huggingface/transformers-pytorch-amd-gpu - ci_event: Scheduled CI (AMD) - mi250 - secrets: inherit - - deepspeed-ci: - name: DeepSpeed CI - uses: ./.github/workflows/self-scheduled-amd.yml - with: - job: run_torch_cuda_extensions_gpu - slack_report_channel: "#transformers-ci-daily-amd" - runner: mi250 - docker: huggingface/transformers-pytorch-deepspeed-amd-gpu - ci_event: Scheduled CI (AMD) - mi250 - secrets: inherit +name: Self-hosted runner (AMD mi250 scheduled CI caller) + +on: + workflow_run: + workflows: ["Self-hosted runner (AMD scheduled CI caller)"] + branches: ["main"] + types: [completed] + push: + branches: + - run_amd_scheduled_ci_caller* + +jobs: + model-ci: + name: Model CI + uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main + with: + job: run_models_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi250 + docker: huggingface/transformers-pytorch-amd-gpu + ci_event: Scheduled CI (AMD) - mi250 + secrets: inherit + + torch-pipeline: + name: Torch pipeline CI + uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main + with: + job: run_pipelines_torch_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi250 + docker: huggingface/transformers-pytorch-amd-gpu + ci_event: Scheduled CI (AMD) - mi250 + secrets: inherit + + example-ci: + name: Example CI + uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main + with: + job: run_examples_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi250 + docker: huggingface/transformers-pytorch-amd-gpu + ci_event: Scheduled CI (AMD) - mi250 + secrets: inherit + + deepspeed-ci: + name: DeepSpeed CI + uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main + with: + job: run_torch_cuda_extensions_gpu + slack_report_channel: "#transformers-ci-daily-amd" + runner: mi250 + docker: huggingface/transformers-pytorch-deepspeed-amd-gpu + ci_event: Scheduled CI (AMD) - mi250 + secrets: inherit diff --git a/.github/workflows/self-scheduled-amd.yml b/.github/workflows/self-scheduled-amd.yml deleted file mode 100644 index 47f92cd6a2b086..00000000000000 --- a/.github/workflows/self-scheduled-amd.yml +++ /dev/null @@ -1,349 +0,0 @@ -name: Self-hosted runner (scheduled-amd) - -# Note: For the AMD CI, we rely on a caller workflow and on the workflow_call event to trigger the -# CI in order to run it on both MI210 and MI250, without having to use matrix here which pushes -# us towards the limit of allowed jobs on GitHub Actions. - -on: - workflow_call: - inputs: - job: - required: true - type: string - slack_report_channel: - required: true - type: string - runner: - required: true - type: string - docker: - required: true - type: string - ci_event: - required: true - type: string - -env: - HF_HOME: /mnt/cache - TRANSFORMERS_IS_CI: yes - OMP_NUM_THREADS: 8 - MKL_NUM_THREADS: 8 - RUN_SLOW: yes - HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} - SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} - NUM_SLICES: 2 - -# Important note: each job (run_tests_single_gpu, run_tests_multi_gpu, run_examples_gpu, run_pipelines_torch_gpu) requires all the previous jobs before running. -# This is done so that we avoid parallelizing the scheduled tests, to leave available -# runners for the push CI that is running on the same machine. -jobs: - check_runner_status: - name: Check Runner Status - runs-on: ubuntu-22.04 - steps: - - name: Checkout transformers - uses: actions/checkout@v4 - with: - fetch-depth: 2 - - - name: Check Runner Status - run: python utils/check_self_hosted_runner.py --target_runners hf-amd-mi210-ci-1gpu-1,hf-amd-mi250-ci-1gpu-1,hf-amd-mi300-ci-1gpu-1 --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} - - check_runners: - name: Check Runners - needs: check_runner_status - strategy: - matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}'] - container: - image: huggingface/transformers-pytorch-amd-gpu - options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - steps: - - name: ROCM-SMI - run: | - rocm-smi - - - name: ROCM-INFO - run: | - rocminfo | grep "Agent" -A 14 - - - name: Show ROCR environment - run: | - echo "ROCR: $ROCR_VISIBLE_DEVICES" - - setup: - if: contains(fromJSON('["run_models_gpu"]'), inputs.job) - name: Setup - needs: check_runners - strategy: - matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}'] - container: - image: huggingface/transformers-pytorch-amd-gpu - options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - outputs: - folder_slices: ${{ steps.set-matrix.outputs.folder_slices }} - slice_ids: ${{ steps.set-matrix.outputs.slice_ids }} - steps: - - name: Update clone - working-directory: /transformers - run: | - git fetch && git checkout ${{ github.sha }} - - - name: Cleanup - working-directory: /transformers - run: | - rm -rf tests/__pycache__ - rm -rf tests/models/__pycache__ - rm -rf reports - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - id: set-matrix - name: Identify models to test - working-directory: /transformers/tests - run: | - echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT - echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT - - - name: ROCM-SMI - run: | - rocm-smi - - - name: ROCM-INFO - run: | - rocminfo | grep "Agent" -A 14 - - - name: Show ROCR environment - run: | - echo "ROCR: $ROCR_VISIBLE_DEVICES" - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - run_models_gpu: - if: ${{ inputs.job == 'run_models_gpu' }} - name: Single GPU tests - needs: setup - strategy: - max-parallel: 1 # For now, not to parallelize. Can change later if it works well. - fail-fast: false - matrix: - machine_type: [single-gpu, multi-gpu] - slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }} - uses: ./.github/workflows/model_jobs_amd.yml - with: - folder_slices: ${{ needs.setup.outputs.folder_slices }} - machine_type: ${{ matrix.machine_type }} - slice_id: ${{ matrix.slice_id }} - runner: ${{ inputs.runner }} - docker: ${{ inputs.docker }} - secrets: inherit - - run_pipelines_torch_gpu: - if: ${{ inputs.job == 'run_pipelines_torch_gpu' }} - name: PyTorch pipelines - needs: check_runners - strategy: - fail-fast: false - matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}'] - container: - image: ${{ inputs.docker }} - options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - steps: - - name: Update clone - working-directory: /transformers - run: git fetch && git checkout ${{ github.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: ROCM-SMI - run: | - rocm-smi - - - name: ROCM-INFO - run: | - rocminfo | grep "Agent" -A 14 - - - name: Show ROCR environment - run: | - echo "ROCR: $ROCR_VISIBLE_DEVICES" - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Run all pipeline tests on GPU - working-directory: /transformers - run: | - python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines -m "not not_device_test" - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports" - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports - - run_examples_gpu: - if: ${{ inputs.job == 'run_examples_gpu' }} - name: Examples directory - needs: check_runners - strategy: - fail-fast: false - matrix: - machine_type: [single-gpu] - runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}'] - container: - image: ${{ inputs.docker }} - options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - steps: - - name: Update clone - working-directory: /transformers - run: git fetch && git checkout ${{ github.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: ROCM-SMI - run: | - rocm-smi - - - name: ROCM-INFO - run: | - rocminfo | grep "Agent" -A 14 - - - name: Show ROCR environment - run: | - echo "ROCR: $ROCR_VISIBLE_DEVICES" - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Run examples tests on GPU - working-directory: /transformers - run: | - pip install -r examples/pytorch/_tests_requirements.txt - python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_examples_gpu_test_reports examples/pytorch -m "not not_device_test" - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports/failures_short.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu_test_reports" - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.machine_type }}_run_examples_gpu_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports - - run_torch_cuda_extensions_gpu: - if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }} - name: Torch ROCm deepspeed tests - needs: check_runners - strategy: - fail-fast: false - matrix: - machine_type: [single-gpu, multi-gpu] - runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}'] - container: - image: ${{ inputs.docker }} - options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ - steps: - - name: Update clone - working-directory: /transformers - run: git fetch && git checkout ${{ github.sha }} - - - name: Reinstall transformers in edit mode (remove the one installed during docker image build) - working-directory: /transformers - run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . - - - name: ROCM-SMI - run: | - rocm-smi - - - name: ROCM-INFO - run: | - rocminfo | grep "Agent" -A 14 - - - name: Show ROCR environment - run: | - echo "ROCR: $ROCR_VISIBLE_DEVICES" - - - name: Environment - working-directory: /transformers - run: | - python3 utils/print_env.py - - - name: Show installed libraries and their versions - working-directory: /transformers - run: pip freeze - - - name: Run all tests on GPU - working-directory: /transformers - run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended -m "not not_device_test" - - - name: Failure short reports - if: ${{ failure() }} - continue-on-error: true - run: cat /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt - - - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports" - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports - path: /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports - - send_results: - name: Slack Report - needs: [ - check_runner_status, - check_runners, - setup, - run_models_gpu, - run_pipelines_torch_gpu, - run_examples_gpu, - run_torch_cuda_extensions_gpu - ] - if: ${{ always() }} - uses: ./.github/workflows/slack-report.yml - with: - job: ${{ inputs.job }} - # This would be `skipped` if `setup` is skipped. - setup_status: ${{ needs.setup.result }} - slack_report_channel: ${{ inputs.slack_report_channel }} - # This would be an empty string if `setup` is skipped. - folder_slices: ${{ needs.setup.outputs.folder_slices }} - quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }} - ci_event: ${{ inputs.ci_event }} - - secrets: inherit From 6054220b77fa92187c6461aa65b1a1ff88222219 Mon Sep 17 00:00:00 2001 From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com> Date: Fri, 6 Dec 2024 16:02:47 +0100 Subject: [PATCH 03/10] Revert deletion of self-push-amd.yml for now --- .../workflows/self-push-amd-mi210-caller.yml | 2 +- .../workflows/self-push-amd-mi250-caller.yml | 2 +- .../workflows/self-push-amd-mi300-caller.yml | 2 +- .github/workflows/self-push-amd.yml | 335 ++++++++++++++++++ 4 files changed, 338 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/self-push-amd.yml diff --git a/.github/workflows/self-push-amd-mi210-caller.yml b/.github/workflows/self-push-amd-mi210-caller.yml index 5612304389581d..2fb075eb212190 100644 --- a/.github/workflows/self-push-amd-mi210-caller.yml +++ b/.github/workflows/self-push-amd-mi210-caller.yml @@ -19,7 +19,7 @@ jobs: run_amd_ci: name: AMD mi210 if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller'))) - uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_push.yaml@main + uses: ./.github/workflows/self-push-amd.yml with: gpu_flavor: mi210 secrets: inherit diff --git a/.github/workflows/self-push-amd-mi250-caller.yml b/.github/workflows/self-push-amd-mi250-caller.yml index ea2801f2c4a4e7..2485a78c33df3c 100644 --- a/.github/workflows/self-push-amd-mi250-caller.yml +++ b/.github/workflows/self-push-amd-mi250-caller.yml @@ -19,7 +19,7 @@ jobs: run_amd_ci: name: AMD mi250 if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller'))) - uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_push.yaml@main + uses: ./.github/workflows/self-push-amd.yml with: gpu_flavor: mi250 secrets: inherit diff --git a/.github/workflows/self-push-amd-mi300-caller.yml b/.github/workflows/self-push-amd-mi300-caller.yml index 08d9155419465a..a8ee4e540ecf3f 100644 --- a/.github/workflows/self-push-amd-mi300-caller.yml +++ b/.github/workflows/self-push-amd-mi300-caller.yml @@ -19,7 +19,7 @@ jobs: run_amd_ci: name: AMD mi300 if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && (startsWith(github.ref_name, 'run_amd_push_ci_caller') || startsWith(github.ref_name, 'mi300-ci')))) - uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_push.yaml@main + uses: ./.github/workflows/self-push-amd.yml with: gpu_flavor: mi300 secrets: inherit diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml new file mode 100644 index 00000000000000..6931c2f3eadcad --- /dev/null +++ b/.github/workflows/self-push-amd.yml @@ -0,0 +1,335 @@ +name: Self-hosted runner AMD GPU (push) + +on: + workflow_call: + inputs: + gpu_flavor: + required: true + type: string + +env: + HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes + OMP_NUM_THREADS: 8 + MKL_NUM_THREADS: 8 + PYTEST_TIMEOUT: 60 + TF_FORCE_GPU_ALLOW_GROWTH: true + RUN_PT_TF_CROSS_TESTS: 1 + HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} + +jobs: + check_runner_status: + name: Check Runner Status + runs-on: ubuntu-22.04 + steps: + - name: Checkout transformers + uses: actions/checkout@v4 + with: + fetch-depth: 2 + + - name: Check Runner Status + run: python utils/check_self_hosted_runner.py --target_runners amd-mi210-single-gpu-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} + + check_runners: + name: Check Runners + needs: check_runner_status + strategy: + matrix: + machine_type: [single-gpu, multi-gpu] + runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + container: + image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now + options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + steps: + - name: ROCM-SMI + run: | + rocm-smi + - name: ROCM-INFO + run: | + rocminfo | grep "Agent" -A 14 + - name: Show ROCR environment + run: | + echo "ROCR: $ROCR_VISIBLE_DEVICES" + + setup_gpu: + name: Setup + needs: check_runners + strategy: + matrix: + machine_type: [single-gpu, multi-gpu] + runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + container: + image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now + options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + test_map: ${{ steps.set-matrix.outputs.test_map }} + env: + # `CI_BRANCH_PUSH`: The branch name from the push event + # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event + # `CI_SHA_PUSH`: The commit SHA from the push event + # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} + steps: + # Necessary to get the correct branch name and commit SHA for `workflow_run` event + # We also take into account the `push` event (we might want to test some changes in a branch) + - name: Prepare custom environment variables + shell: bash + # `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty) + # `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty) + run: | + CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} + echo $CI_BRANCH_PUSH + echo $CI_BRANCH_WORKFLOW_RUN + echo $CI_SHA_PUSH + echo $CI_SHA_WORKFLOW_RUN + [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV + [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV + + - name: print environment variables + run: | + echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" + echo "env.CI_SHA = ${{ env.CI_SHA }}" + + - name: Update clone using environment variables + working-directory: /transformers + run: | + echo "original branch = $(git branch --show-current)" + git fetch && git checkout ${{ env.CI_BRANCH }} + echo "updated branch = $(git branch --show-current)" + git checkout ${{ env.CI_SHA }} + echo "log = $(git log -n 1)" + + - name: Cleanup + working-directory: /transformers + run: | + rm -rf tests/__pycache__ + rm -rf tests/models/__pycache__ + rm -rf reports + + - name: Show installed libraries and their versions + working-directory: /transformers + run: pip freeze + + - name: Fetch the tests to run + working-directory: /transformers + # TODO: add `git-python` in the docker images + run: | + pip install --upgrade git-python + python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt + + - name: Report fetched tests + uses: actions/upload-artifact@v4 + with: + name: test_fetched + path: /transformers/test_preparation.txt + + - id: set-matrix + name: Organize tests into models + working-directory: /transformers + # The `keys` is used as GitHub actions matrix for jobs, i.e. `models/bert`, `tokenization`, `pipeline`, etc. + # The `test_map` is used to get the actual identified test files under each key. + # If no test to run (so no `test_map.json` file), create a dummy map (empty matrix will fail) + run: | + if [ -f test_map.json ]; then + keys=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); d = list(test_map.keys()); print(d)') + test_map=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); print(test_map)') + else + keys=$(python3 -c 'keys = ["dummy"]; print(keys)') + test_map=$(python3 -c 'test_map = {"dummy": []}; print(test_map)') + fi + echo $keys + echo $test_map + echo "matrix=$keys" >> $GITHUB_OUTPUT + echo "test_map=$test_map" >> $GITHUB_OUTPUT + + run_models_gpu: + name: Model tests + needs: setup_gpu + # `dummy` means there is no test to run + if: contains(fromJson(needs.setup_gpu.outputs.matrix), 'dummy') != true + strategy: + fail-fast: false + matrix: + folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }} + machine_type: [single-gpu, multi-gpu] + runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] + container: + image: huggingface/transformers-pytorch-amd-gpu-push-ci # <--- We test only for PyTorch for now + options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ + env: + # For the meaning of these environment variables, see the job `Setup` + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} + steps: + # Necessary to get the correct branch name and commit SHA for `workflow_run` event + # We also take into account the `push` event (we might want to test some changes in a branch) + - name: Prepare custom environment variables + shell: bash + # For the meaning of these environment variables, see the job `Setup` + run: | + CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} + echo $CI_BRANCH_PUSH + echo $CI_BRANCH_WORKFLOW_RUN + echo $CI_SHA_PUSH + echo $CI_SHA_WORKFLOW_RUN + [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV + [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV + + - name: print environment variables + run: | + echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" + echo "env.CI_SHA = ${{ env.CI_SHA }}" + + - name: Update clone using environment variables + working-directory: /transformers + run: | + echo "original branch = $(git branch --show-current)" + git fetch && git checkout ${{ env.CI_BRANCH }} + echo "updated branch = $(git branch --show-current)" + git checkout ${{ env.CI_SHA }} + echo "log = $(git log -n 1)" + + - name: Reinstall transformers in edit mode (remove the one installed during docker image build) + working-directory: /transformers + run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . + + - name: Echo folder ${{ matrix.folders }} + shell: bash + # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to + # set the artifact folder names (because the character `/` is not allowed). + run: | + echo "${{ matrix.folders }}" + echo "${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }}" + matrix_folders=${{ matrix.folders }} + matrix_folders=${matrix_folders/'models/'/'models_'} + echo "$matrix_folders" + echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV + + - name: ROCM-SMI + run: | + rocm-smi + - name: ROCM-INFO + run: | + rocminfo | grep "Agent" -A 14 + - name: Show ROCR environment + run: | + echo "ROCR: $ROCR_VISIBLE_DEVICES" + + - name: Environment + working-directory: /transformers + run: | + python3 utils/print_env.py + + - name: Show installed libraries and their versions + working-directory: /transformers + run: pip freeze + + - name: Run all non-slow selected tests on GPU + working-directory: /transformers + run: | + python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports ${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }} -m "not not_device_test" + + - name: Failure short reports + if: ${{ failure() }} + continue-on-error: true + run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt + + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports + path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports + + send_results: + name: Send results to webhook + runs-on: ubuntu-22.04 + if: always() + needs: [ + check_runner_status, + check_runners, + setup_gpu, + run_models_gpu, +# run_tests_torch_cuda_extensions_single_gpu, +# run_tests_torch_cuda_extensions_multi_gpu + ] + env: + # For the meaning of these environment variables, see the job `Setup` + CI_BRANCH_PUSH: ${{ github.event.ref }} + CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} + CI_SHA_PUSH: ${{ github.event.head_commit.id }} + CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} + steps: + - name: Preliminary job status + shell: bash + # For the meaning of these environment variables, see the job `Setup` + run: | + echo "Runner availability: ${{ needs.check_runner_status.result }}" + echo "Setup status: ${{ needs.setup_gpu.result }}" + echo "Runner status: ${{ needs.check_runners.result }}" + + # Necessary to get the correct branch name and commit SHA for `workflow_run` event + # We also take into account the `push` event (we might want to test some changes in a branch) + - name: Prepare custom environment variables + shell: bash + # For the meaning of these environment variables, see the job `Setup` + run: | + CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} + echo $CI_BRANCH_PUSH + echo $CI_BRANCH_WORKFLOW_RUN + echo $CI_SHA_PUSH + echo $CI_SHA_WORKFLOW_RUN + [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV + [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV + + - name: print environment variables + run: | + echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" + echo "env.CI_SHA = ${{ env.CI_SHA }}" + + - uses: actions/checkout@v4 + # To avoid failure when multiple commits are merged into `main` in a short period of time. + # Checking out to an old commit beyond the fetch depth will get an error `fatal: reference is not a tree: ... + # (Only required for `workflow_run` event, where we get the latest HEAD on `main` instead of the event commit) + with: + fetch-depth: 20 + + - name: Update clone using environment variables + run: | + echo "original branch = $(git branch --show-current)" + git fetch && git checkout ${{ env.CI_BRANCH }} + echo "updated branch = $(git branch --show-current)" + git checkout ${{ env.CI_SHA }} + echo "log = $(git log -n 1)" + + - uses: actions/download-artifact@v4 + - name: Send message to Slack + env: + CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} + CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} + CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} + CI_SLACK_CHANNEL_ID_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }} + CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} + CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }} + ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} + CI_EVENT: Push CI (AMD) - ${{ inputs.gpu_flavor }} + CI_TITLE_PUSH: ${{ github.event.head_commit.message }} + CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }} + CI_SHA: ${{ env.CI_SHA }} + RUNNER_STATUS: ${{ needs.check_runner_status.result }} + RUNNER_ENV_STATUS: ${{ needs.check_runners.result }} + SETUP_STATUS: ${{ needs.setup_gpu.result }} + + # We pass `needs.setup_gpu.outputs.matrix` as the argument. A processing in `notification_service.py` to change + # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`. + run: | + pip install huggingface_hub + pip install slack_sdk + pip show slack_sdk + python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}" From 76a9fdcbc57781637433c0e5f22abc8f5c25af68 Mon Sep 17 00:00:00 2001 From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com> Date: Mon, 16 Dec 2024 13:54:53 +0100 Subject: [PATCH 04/10] Revert amd push ci changes --- .../workflows/self-push-amd-mi210-caller.yml | 50 +++++++++---------- .../workflows/self-push-amd-mi250-caller.yml | 50 +++++++++---------- 2 files changed, 50 insertions(+), 50 deletions(-) diff --git a/.github/workflows/self-push-amd-mi210-caller.yml b/.github/workflows/self-push-amd-mi210-caller.yml index 2fb075eb212190..a401e40ee7f164 100644 --- a/.github/workflows/self-push-amd-mi210-caller.yml +++ b/.github/workflows/self-push-amd-mi210-caller.yml @@ -1,25 +1,25 @@ -name: Self-hosted runner (AMD mi210 CI caller) - -on: - workflow_run: - workflows: ["Self-hosted runner (push-caller)"] - branches: ["main"] - types: [completed] - push: - branches: - - run_amd_push_ci_caller* - paths: - - "src/**" - - "tests/**" - - ".github/**" - - "templates/**" - - "utils/**" - -jobs: - run_amd_ci: - name: AMD mi210 - if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller'))) - uses: ./.github/workflows/self-push-amd.yml - with: - gpu_flavor: mi210 - secrets: inherit +name: Self-hosted runner (AMD mi210 CI caller) + +on: + workflow_run: + workflows: ["Self-hosted runner (push-caller)"] + branches: ["main"] + types: [completed] + push: + branches: + - run_amd_push_ci_caller* + paths: + - "src/**" + - "tests/**" + - ".github/**" + - "templates/**" + - "utils/**" + +jobs: + run_amd_ci: + name: AMD mi210 + if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller'))) + uses: ./.github/workflows/self-push-amd.yml + with: + gpu_flavor: mi210 + secrets: inherit diff --git a/.github/workflows/self-push-amd-mi250-caller.yml b/.github/workflows/self-push-amd-mi250-caller.yml index 2485a78c33df3c..fef532703170cb 100644 --- a/.github/workflows/self-push-amd-mi250-caller.yml +++ b/.github/workflows/self-push-amd-mi250-caller.yml @@ -1,25 +1,25 @@ -name: Self-hosted runner (AMD mi250 CI caller) - -on: - workflow_run: - workflows: ["Self-hosted runner (push-caller)"] - branches: ["main"] - types: [completed] - push: - branches: - - run_amd_push_ci_caller* - paths: - - "src/**" - - "tests/**" - - ".github/**" - - "templates/**" - - "utils/**" - -jobs: - run_amd_ci: - name: AMD mi250 - if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller'))) - uses: ./.github/workflows/self-push-amd.yml - with: - gpu_flavor: mi250 - secrets: inherit +name: Self-hosted runner (AMD mi250 CI caller) + +on: + workflow_run: + workflows: ["Self-hosted runner (push-caller)"] + branches: ["main"] + types: [completed] + push: + branches: + - run_amd_push_ci_caller* + paths: + - "src/**" + - "tests/**" + - ".github/**" + - "templates/**" + - "utils/**" + +jobs: + run_amd_ci: + name: AMD mi250 + if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller'))) + uses: ./.github/workflows/self-push-amd.yml + with: + gpu_flavor: mi250 + secrets: inherit From b2d0e283e94690b442bd83642479ac1bb8c33cfc Mon Sep 17 00:00:00 2001 From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com> Date: Fri, 20 Dec 2024 18:24:33 +0100 Subject: [PATCH 05/10] Add option of specifying result upload repo --- .github/workflows/slack-report.yml | 15 ++++++++++++--- utils/notification_service.py | 23 ++++++++++++++++------- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/.github/workflows/slack-report.yml b/.github/workflows/slack-report.yml index ee2962ba89c37f..eb3294f392d94f 100644 --- a/.github/workflows/slack-report.yml +++ b/.github/workflows/slack-report.yml @@ -21,6 +21,13 @@ on: ci_event: required: true type: string + report_repo_id: + required: false + type: string + upload_report_summary: + required: false + type: boolean + default: false env: TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }} @@ -47,6 +54,8 @@ jobs: CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }} + REPORT_REPO_ID: ${{ inputs.report_repo_id }} + UPLOAD_REPORT_SUMMARY: ${{ inputs.upload_report_summary }} ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} CI_EVENT: ${{ inputs.ci_event }} CI_SHA: ${{ github.sha }} @@ -70,7 +79,7 @@ jobs: with: name: ci_results_${{ inputs.job }} path: ci_results_${{ inputs.job }} - + - uses: actions/checkout@v4 - uses: actions/download-artifact@v4 - name: Send message to Slack for quantization workflow @@ -90,7 +99,7 @@ jobs: pip install huggingface_hub pip install slack_sdk pip show slack_sdk - python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}" + python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}" # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack. - name: Failure table artifacts @@ -98,4 +107,4 @@ jobs: uses: actions/upload-artifact@v4 with: name: ci_results_${{ inputs.job }} - path: ci_results_${{ inputs.job }} \ No newline at end of file + path: ci_results_${{ inputs.job }} diff --git a/utils/notification_service.py b/utils/notification_service.py index 6c9eab3a85387b..3581fd4fac3f7f 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -108,11 +108,13 @@ def __init__( ci_title: str, model_results: Dict, additional_results: Dict, - selected_warnings: List = None, + repo_id: str = "hf-internal-testing/transformers_daily_ci", + selected_warnings: Union[List, None] = None, prev_ci_artifacts=None, ): self.title = title self.ci_title = ci_title + self.repo_id = repo_id # Failures and success of the modeling tests self.n_model_success = sum(r["success"] for r in model_results.values()) @@ -533,11 +535,11 @@ def payload(self) -> str: commit_info = api.upload_file( path_or_fileobj=file_path, path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt", - repo_id="hf-internal-testing/transformers_daily_ci", + repo_id=self.repo_id, repo_type="dataset", token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), ) - url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt" + url = f"https://huggingface.co/datasets/{self.repo_id}/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt" # extra processing to save to json format new_failed_tests = {} @@ -560,7 +562,7 @@ def payload(self) -> str: _ = api.upload_file( path_or_fileobj=file_path, path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.json", - repo_id="hf-internal-testing/transformers_daily_ci", + repo_id=self.repo_id, repo_type="dataset", token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), ) @@ -920,6 +922,8 @@ def prepare_reports(title, header, reports, to_truncate=True): if __name__ == "__main__": SLACK_REPORT_CHANNEL_ID = os.environ["SLACK_REPORT_CHANNEL"] + REPORT_REPO_ID = os.environ.get("REPORT_REPO_ID", "hf-internal-testing/transformers_daily_ci") + UPLOAD_REPORT_SUMMARY = os.environ.get("UPLOAD_REPORT_SUMMARY") == "true" # runner_status = os.environ.get("RUNNER_STATUS") # runner_env_status = os.environ.get("RUNNER_ENV_STATUS") @@ -1220,7 +1224,8 @@ def prepare_reports(title, header, reports, to_truncate=True): os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}")) target_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml@refs/heads/main" - is_scheduled_ci_run = os.environ.get("CI_WORKFLOW_REF") == target_workflow + amd_target_workflow = "huggingface/transformers/.github/workflows/self-scheduled-amd-caller.yml@refs/heads/main" + is_scheduled_ci_run = os.environ.get("CI_WORKFLOW_REF") in [target_workflow, amd_target_workflow] # Only the model testing job is concerned: this condition is to avoid other jobs to upload the empty list as # results. @@ -1233,7 +1238,7 @@ def prepare_reports(title, header, reports, to_truncate=True): api.upload_file( path_or_fileobj=f"ci_results_{job_name}/model_results.json", path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/model_results.json", - repo_id="hf-internal-testing/transformers_daily_ci", + repo_id=REPORT_REPO_ID, repo_type="dataset", token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), ) @@ -1255,7 +1260,7 @@ def prepare_reports(title, header, reports, to_truncate=True): api.upload_file( path_or_fileobj=f"ci_results_{job_name}/{test_to_result_name[job]}_results.json", path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/{test_to_result_name[job]}_results.json", - repo_id="hf-internal-testing/transformers_daily_ci", + repo_id=REPORT_REPO_ID, repo_type="dataset", token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), ) @@ -1276,6 +1281,7 @@ def prepare_reports(title, header, reports, to_truncate=True): ci_title, model_results, additional_results, + repo_id=REPORT_REPO_ID, selected_warnings=selected_warnings, prev_ci_artifacts=prev_ci_artifacts, ) @@ -1284,3 +1290,6 @@ def prepare_reports(title, header, reports, to_truncate=True): if message.n_failures or (ci_event != "push" and not ci_event.startswith("Push CI (AMD)")): message.post() message.post_reply() + + # if UPLOAD_REPORT_SUMMARY + # message.upload_to_repo() From da3448dacf8ae5265d4b005a4e6902dcddb804ad Mon Sep 17 00:00:00 2001 From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com> Date: Tue, 14 Jan 2025 14:35:47 +0100 Subject: [PATCH 06/10] handle empty string REPORT_REPO_ID correctly --- utils/notification_service.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/utils/notification_service.py b/utils/notification_service.py index 3581fd4fac3f7f..02cc1194beab8e 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -922,7 +922,9 @@ def prepare_reports(title, header, reports, to_truncate=True): if __name__ == "__main__": SLACK_REPORT_CHANNEL_ID = os.environ["SLACK_REPORT_CHANNEL"] - REPORT_REPO_ID = os.environ.get("REPORT_REPO_ID", "hf-internal-testing/transformers_daily_ci") + REPORT_REPO_ID = os.environ.get("REPORT_REPO_ID") + if not REPORT_REPO_ID: + REPORT_REPO_ID = "hf-internal-testing/transformers_daily_ci" UPLOAD_REPORT_SUMMARY = os.environ.get("UPLOAD_REPORT_SUMMARY") == "true" # runner_status = os.environ.get("RUNNER_STATUS") @@ -1290,6 +1292,3 @@ def prepare_reports(title, header, reports, to_truncate=True): if message.n_failures or (ci_event != "push" and not ci_event.startswith("Push CI (AMD)")): message.post() message.post_reply() - - # if UPLOAD_REPORT_SUMMARY - # message.upload_to_repo() From 0d90a51f726145d40da46b2b9d546085bfb4b6da Mon Sep 17 00:00:00 2001 From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com> Date: Tue, 14 Jan 2025 15:00:18 +0100 Subject: [PATCH 07/10] Add workflow_id (defaults to Self-hosted runner (scheduled)) --- utils/get_previous_daily_ci.py | 26 ++++++++++++++------------ utils/notification_service.py | 3 ++- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/utils/get_previous_daily_ci.py b/utils/get_previous_daily_ci.py index efd7d24a752991..e75b4896d482d3 100644 --- a/utils/get_previous_daily_ci.py +++ b/utils/get_previous_daily_ci.py @@ -5,7 +5,14 @@ from get_ci_error_statistics import download_artifact, get_artifacts_links -def get_daily_ci_runs(token, num_runs=7): + +# This is the id of a workflow (not of a workflow run). +# From a given workflow run (where we have workflow run id), we can get the workflow id by going to +# https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id} +# and check the `workflow_id` key. +DEFAULT_WORKFLOW_ID = "90575235" + +def get_daily_ci_runs(token, workflow_id = DEFAULT_WORKFLOW_ID, num_runs=7): """Get the workflow runs of the scheduled (daily) CI. This only selects the runs triggered by the `schedule` event on the `main` branch. @@ -14,11 +21,6 @@ def get_daily_ci_runs(token, num_runs=7): if token is not None: headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}"} - # The id of a workflow (not of a workflow run). - # From a given workflow run (where we have workflow run id), we can get the workflow id by going to - # https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id} - # and check the `workflow_id` key. - workflow_id = "90575235" url = f"https://api.github.com/repos/huggingface/transformers/actions/workflows/{workflow_id}/runs" # On `main` branch + event being `schedule` + not returning PRs + only `num_runs` results @@ -29,9 +31,9 @@ def get_daily_ci_runs(token, num_runs=7): return result["workflow_runs"] -def get_last_daily_ci_runs(token): +def get_last_daily_ci_runs(token, workflow_id = DEFAULT_WORKFLOW_ID): """Get the last completed workflow run id of the scheduled (daily) CI.""" - workflow_runs = get_daily_ci_runs(token) + workflow_runs = get_daily_ci_runs(token, workflow_id) workflow_run_id = None for workflow_run in workflow_runs: if workflow_run["status"] == "completed": @@ -53,9 +55,9 @@ def get_last_daily_ci_run_commit(token): return head_sha -def get_last_daily_ci_artifacts(artifact_names, output_dir, token): +def get_last_daily_ci_artifacts(artifact_names, output_dir, token, workflow_id = DEFAULT_WORKFLOW_ID): """Get the artifacts of last completed workflow run id of the scheduled (daily) CI.""" - workflow_run_id = get_last_daily_ci_runs(token) + workflow_run_id = get_last_daily_ci_runs(token, workflow_id) if workflow_run_id is not None: artifacts_links = get_artifacts_links(worflow_run_id=workflow_run_id, token=token) for artifact_name in artifact_names: @@ -66,9 +68,9 @@ def get_last_daily_ci_artifacts(artifact_names, output_dir, token): ) -def get_last_daily_ci_reports(artifact_names, output_dir, token): +def get_last_daily_ci_reports(artifact_names, output_dir, token, workflow_id = DEFAULT_WORKFLOW_ID): """Get the artifacts' content of the last completed workflow run id of the scheduled (daily) CI.""" - get_last_daily_ci_artifacts(artifact_names, output_dir, token) + get_last_daily_ci_artifacts(artifact_names, output_dir, token, workflow_id) results = {} for artifact_name in artifact_names: diff --git a/utils/notification_service.py b/utils/notification_service.py index 02cc1194beab8e..d361ffa725ee55 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -926,6 +926,7 @@ def prepare_reports(title, header, reports, to_truncate=True): if not REPORT_REPO_ID: REPORT_REPO_ID = "hf-internal-testing/transformers_daily_ci" UPLOAD_REPORT_SUMMARY = os.environ.get("UPLOAD_REPORT_SUMMARY") == "true" + WORKFLOW_ID = "90575235" # runner_status = os.environ.get("RUNNER_STATUS") # runner_env_status = os.environ.get("RUNNER_ENV_STATUS") @@ -1275,7 +1276,7 @@ def prepare_reports(title, header, reports, to_truncate=True): output_dir = os.path.join(os.getcwd(), "previous_reports") os.makedirs(output_dir, exist_ok=True) prev_ci_artifacts = get_last_daily_ci_reports( - artifact_names=artifact_names, output_dir=output_dir, token=os.environ["ACCESS_REPO_INFO_TOKEN"] + artifact_names=artifact_names, output_dir=output_dir, token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_id=WORKFLOW_ID ) message = Message( From 526bb303d245cc053a9da8c8d2252172568aa9b1 Mon Sep 17 00:00:00 2001 From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com> Date: Wed, 15 Jan 2025 12:07:47 +0100 Subject: [PATCH 08/10] Fix call to get_workflow_id. ruff format --- utils/get_previous_daily_ci.py | 27 +++++++++++++++++++++------ utils/notification_service.py | 10 ++++++++-- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/utils/get_previous_daily_ci.py b/utils/get_previous_daily_ci.py index e75b4896d482d3..c46a924ea1c138 100644 --- a/utils/get_previous_daily_ci.py +++ b/utils/get_previous_daily_ci.py @@ -5,14 +5,30 @@ from get_ci_error_statistics import download_artifact, get_artifacts_links - # This is the id of a workflow (not of a workflow run). # From a given workflow run (where we have workflow run id), we can get the workflow id by going to # https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id} # and check the `workflow_id` key. DEFAULT_WORKFLOW_ID = "90575235" -def get_daily_ci_runs(token, workflow_id = DEFAULT_WORKFLOW_ID, num_runs=7): + +def get_workflow_id(token, run_id): + """Get the workflow id of the provided run""" + + if run_id is None: + return DEFAULT_WORKFLOW_ID + + headers = None + if token is not None: + headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}"} + + url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{run_id}" + result = requests.get(url, headers=headers).json() + + return result["workflow_id"] + + +def get_daily_ci_runs(token, workflow_id=DEFAULT_WORKFLOW_ID, num_runs=7): """Get the workflow runs of the scheduled (daily) CI. This only selects the runs triggered by the `schedule` event on the `main` branch. @@ -21,7 +37,6 @@ def get_daily_ci_runs(token, workflow_id = DEFAULT_WORKFLOW_ID, num_runs=7): if token is not None: headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}"} - url = f"https://api.github.com/repos/huggingface/transformers/actions/workflows/{workflow_id}/runs" # On `main` branch + event being `schedule` + not returning PRs + only `num_runs` results url += f"?branch=main&event=schedule&exclude_pull_requests=true&per_page={num_runs}" @@ -31,7 +46,7 @@ def get_daily_ci_runs(token, workflow_id = DEFAULT_WORKFLOW_ID, num_runs=7): return result["workflow_runs"] -def get_last_daily_ci_runs(token, workflow_id = DEFAULT_WORKFLOW_ID): +def get_last_daily_ci_runs(token, workflow_id=DEFAULT_WORKFLOW_ID): """Get the last completed workflow run id of the scheduled (daily) CI.""" workflow_runs = get_daily_ci_runs(token, workflow_id) workflow_run_id = None @@ -55,7 +70,7 @@ def get_last_daily_ci_run_commit(token): return head_sha -def get_last_daily_ci_artifacts(artifact_names, output_dir, token, workflow_id = DEFAULT_WORKFLOW_ID): +def get_last_daily_ci_artifacts(artifact_names, output_dir, token, workflow_id=DEFAULT_WORKFLOW_ID): """Get the artifacts of last completed workflow run id of the scheduled (daily) CI.""" workflow_run_id = get_last_daily_ci_runs(token, workflow_id) if workflow_run_id is not None: @@ -68,7 +83,7 @@ def get_last_daily_ci_artifacts(artifact_names, output_dir, token, workflow_id = ) -def get_last_daily_ci_reports(artifact_names, output_dir, token, workflow_id = DEFAULT_WORKFLOW_ID): +def get_last_daily_ci_reports(artifact_names, output_dir, token, workflow_id=DEFAULT_WORKFLOW_ID): """Get the artifacts' content of the last completed workflow run id of the scheduled (daily) CI.""" get_last_daily_ci_artifacts(artifact_names, output_dir, token, workflow_id) diff --git a/utils/notification_service.py b/utils/notification_service.py index d361ffa725ee55..d501a24eb02899 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -26,7 +26,7 @@ import requests from get_ci_error_statistics import get_jobs -from get_previous_daily_ci import get_last_daily_ci_reports +from get_previous_daily_ci import get_last_daily_ci_reports, get_workflow_id from huggingface_hub import HfApi from slack_sdk import WebClient @@ -1275,8 +1275,14 @@ def prepare_reports(title, header, reports, to_truncate=True): artifact_names = [f"ci_results_{job_name}"] output_dir = os.path.join(os.getcwd(), "previous_reports") os.makedirs(output_dir, exist_ok=True) + workflow_id = None + token = os.environ["ACCESS_REPO_INFO_TOKEN"] + workflow_id = get_workflow_id(token, os.environ["GITHUB_RUN_ID"]) prev_ci_artifacts = get_last_daily_ci_reports( - artifact_names=artifact_names, output_dir=output_dir, token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_id=WORKFLOW_ID + artifact_names=artifact_names, + output_dir=output_dir, + token=token, + workflow_id=workflow_id, ) message = Message( From 19c73cb0b1a7078c3d0d1b77bed405a1c5b55997 Mon Sep 17 00:00:00 2001 From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com> Date: Wed, 15 Jan 2025 13:02:21 +0100 Subject: [PATCH 09/10] Remove redundant variable --- utils/notification_service.py | 1 - 1 file changed, 1 deletion(-) diff --git a/utils/notification_service.py b/utils/notification_service.py index d501a24eb02899..301c77d7b201fb 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -1275,7 +1275,6 @@ def prepare_reports(title, header, reports, to_truncate=True): artifact_names = [f"ci_results_{job_name}"] output_dir = os.path.join(os.getcwd(), "previous_reports") os.makedirs(output_dir, exist_ok=True) - workflow_id = None token = os.environ["ACCESS_REPO_INFO_TOKEN"] workflow_id = get_workflow_id(token, os.environ["GITHUB_RUN_ID"]) prev_ci_artifacts = get_last_daily_ci_reports( From 4afffcf9a6196430b8a129d6be8987d7f7f71a6d Mon Sep 17 00:00:00 2001 From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com> Date: Fri, 17 Jan 2025 20:46:17 +0100 Subject: [PATCH 10/10] Revert some changes that were deemed no longer required --- .github/workflows/slack-report.yml | 9 ------- utils/get_previous_daily_ci.py | 43 +++++++++--------------------- utils/notification_service.py | 32 +++++++--------------- 3 files changed, 22 insertions(+), 62 deletions(-) diff --git a/.github/workflows/slack-report.yml b/.github/workflows/slack-report.yml index eb3294f392d94f..cbea37ff567a96 100644 --- a/.github/workflows/slack-report.yml +++ b/.github/workflows/slack-report.yml @@ -21,13 +21,6 @@ on: ci_event: required: true type: string - report_repo_id: - required: false - type: string - upload_report_summary: - required: false - type: boolean - default: false env: TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }} @@ -54,8 +47,6 @@ jobs: CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }} - REPORT_REPO_ID: ${{ inputs.report_repo_id }} - UPLOAD_REPORT_SUMMARY: ${{ inputs.upload_report_summary }} ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} CI_EVENT: ${{ inputs.ci_event }} CI_SHA: ${{ github.sha }} diff --git a/utils/get_previous_daily_ci.py b/utils/get_previous_daily_ci.py index c46a924ea1c138..efd7d24a752991 100644 --- a/utils/get_previous_daily_ci.py +++ b/utils/get_previous_daily_ci.py @@ -5,30 +5,7 @@ from get_ci_error_statistics import download_artifact, get_artifacts_links -# This is the id of a workflow (not of a workflow run). -# From a given workflow run (where we have workflow run id), we can get the workflow id by going to -# https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id} -# and check the `workflow_id` key. -DEFAULT_WORKFLOW_ID = "90575235" - - -def get_workflow_id(token, run_id): - """Get the workflow id of the provided run""" - - if run_id is None: - return DEFAULT_WORKFLOW_ID - - headers = None - if token is not None: - headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}"} - - url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{run_id}" - result = requests.get(url, headers=headers).json() - - return result["workflow_id"] - - -def get_daily_ci_runs(token, workflow_id=DEFAULT_WORKFLOW_ID, num_runs=7): +def get_daily_ci_runs(token, num_runs=7): """Get the workflow runs of the scheduled (daily) CI. This only selects the runs triggered by the `schedule` event on the `main` branch. @@ -37,6 +14,12 @@ def get_daily_ci_runs(token, workflow_id=DEFAULT_WORKFLOW_ID, num_runs=7): if token is not None: headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}"} + # The id of a workflow (not of a workflow run). + # From a given workflow run (where we have workflow run id), we can get the workflow id by going to + # https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id} + # and check the `workflow_id` key. + workflow_id = "90575235" + url = f"https://api.github.com/repos/huggingface/transformers/actions/workflows/{workflow_id}/runs" # On `main` branch + event being `schedule` + not returning PRs + only `num_runs` results url += f"?branch=main&event=schedule&exclude_pull_requests=true&per_page={num_runs}" @@ -46,9 +29,9 @@ def get_daily_ci_runs(token, workflow_id=DEFAULT_WORKFLOW_ID, num_runs=7): return result["workflow_runs"] -def get_last_daily_ci_runs(token, workflow_id=DEFAULT_WORKFLOW_ID): +def get_last_daily_ci_runs(token): """Get the last completed workflow run id of the scheduled (daily) CI.""" - workflow_runs = get_daily_ci_runs(token, workflow_id) + workflow_runs = get_daily_ci_runs(token) workflow_run_id = None for workflow_run in workflow_runs: if workflow_run["status"] == "completed": @@ -70,9 +53,9 @@ def get_last_daily_ci_run_commit(token): return head_sha -def get_last_daily_ci_artifacts(artifact_names, output_dir, token, workflow_id=DEFAULT_WORKFLOW_ID): +def get_last_daily_ci_artifacts(artifact_names, output_dir, token): """Get the artifacts of last completed workflow run id of the scheduled (daily) CI.""" - workflow_run_id = get_last_daily_ci_runs(token, workflow_id) + workflow_run_id = get_last_daily_ci_runs(token) if workflow_run_id is not None: artifacts_links = get_artifacts_links(worflow_run_id=workflow_run_id, token=token) for artifact_name in artifact_names: @@ -83,9 +66,9 @@ def get_last_daily_ci_artifacts(artifact_names, output_dir, token, workflow_id=D ) -def get_last_daily_ci_reports(artifact_names, output_dir, token, workflow_id=DEFAULT_WORKFLOW_ID): +def get_last_daily_ci_reports(artifact_names, output_dir, token): """Get the artifacts' content of the last completed workflow run id of the scheduled (daily) CI.""" - get_last_daily_ci_artifacts(artifact_names, output_dir, token, workflow_id) + get_last_daily_ci_artifacts(artifact_names, output_dir, token) results = {} for artifact_name in artifact_names: diff --git a/utils/notification_service.py b/utils/notification_service.py index 301c77d7b201fb..6c9eab3a85387b 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -26,7 +26,7 @@ import requests from get_ci_error_statistics import get_jobs -from get_previous_daily_ci import get_last_daily_ci_reports, get_workflow_id +from get_previous_daily_ci import get_last_daily_ci_reports from huggingface_hub import HfApi from slack_sdk import WebClient @@ -108,13 +108,11 @@ def __init__( ci_title: str, model_results: Dict, additional_results: Dict, - repo_id: str = "hf-internal-testing/transformers_daily_ci", - selected_warnings: Union[List, None] = None, + selected_warnings: List = None, prev_ci_artifacts=None, ): self.title = title self.ci_title = ci_title - self.repo_id = repo_id # Failures and success of the modeling tests self.n_model_success = sum(r["success"] for r in model_results.values()) @@ -535,11 +533,11 @@ def payload(self) -> str: commit_info = api.upload_file( path_or_fileobj=file_path, path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt", - repo_id=self.repo_id, + repo_id="hf-internal-testing/transformers_daily_ci", repo_type="dataset", token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), ) - url = f"https://huggingface.co/datasets/{self.repo_id}/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt" + url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt" # extra processing to save to json format new_failed_tests = {} @@ -562,7 +560,7 @@ def payload(self) -> str: _ = api.upload_file( path_or_fileobj=file_path, path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.json", - repo_id=self.repo_id, + repo_id="hf-internal-testing/transformers_daily_ci", repo_type="dataset", token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), ) @@ -922,11 +920,6 @@ def prepare_reports(title, header, reports, to_truncate=True): if __name__ == "__main__": SLACK_REPORT_CHANNEL_ID = os.environ["SLACK_REPORT_CHANNEL"] - REPORT_REPO_ID = os.environ.get("REPORT_REPO_ID") - if not REPORT_REPO_ID: - REPORT_REPO_ID = "hf-internal-testing/transformers_daily_ci" - UPLOAD_REPORT_SUMMARY = os.environ.get("UPLOAD_REPORT_SUMMARY") == "true" - WORKFLOW_ID = "90575235" # runner_status = os.environ.get("RUNNER_STATUS") # runner_env_status = os.environ.get("RUNNER_ENV_STATUS") @@ -1227,8 +1220,7 @@ def prepare_reports(title, header, reports, to_truncate=True): os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}")) target_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml@refs/heads/main" - amd_target_workflow = "huggingface/transformers/.github/workflows/self-scheduled-amd-caller.yml@refs/heads/main" - is_scheduled_ci_run = os.environ.get("CI_WORKFLOW_REF") in [target_workflow, amd_target_workflow] + is_scheduled_ci_run = os.environ.get("CI_WORKFLOW_REF") == target_workflow # Only the model testing job is concerned: this condition is to avoid other jobs to upload the empty list as # results. @@ -1241,7 +1233,7 @@ def prepare_reports(title, header, reports, to_truncate=True): api.upload_file( path_or_fileobj=f"ci_results_{job_name}/model_results.json", path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/model_results.json", - repo_id=REPORT_REPO_ID, + repo_id="hf-internal-testing/transformers_daily_ci", repo_type="dataset", token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), ) @@ -1263,7 +1255,7 @@ def prepare_reports(title, header, reports, to_truncate=True): api.upload_file( path_or_fileobj=f"ci_results_{job_name}/{test_to_result_name[job]}_results.json", path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/{test_to_result_name[job]}_results.json", - repo_id=REPORT_REPO_ID, + repo_id="hf-internal-testing/transformers_daily_ci", repo_type="dataset", token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None), ) @@ -1275,13 +1267,8 @@ def prepare_reports(title, header, reports, to_truncate=True): artifact_names = [f"ci_results_{job_name}"] output_dir = os.path.join(os.getcwd(), "previous_reports") os.makedirs(output_dir, exist_ok=True) - token = os.environ["ACCESS_REPO_INFO_TOKEN"] - workflow_id = get_workflow_id(token, os.environ["GITHUB_RUN_ID"]) prev_ci_artifacts = get_last_daily_ci_reports( - artifact_names=artifact_names, - output_dir=output_dir, - token=token, - workflow_id=workflow_id, + artifact_names=artifact_names, output_dir=output_dir, token=os.environ["ACCESS_REPO_INFO_TOKEN"] ) message = Message( @@ -1289,7 +1276,6 @@ def prepare_reports(title, header, reports, to_truncate=True): ci_title, model_results, additional_results, - repo_id=REPORT_REPO_ID, selected_warnings=selected_warnings, prev_ci_artifacts=prev_ci_artifacts, )