From d38ed44e9dc3a7b8089a9ad57452c2035a7e3ac7 Mon Sep 17 00:00:00 2001
From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com>
Date: Tue, 3 Dec 2024 14:22:04 +0100
Subject: [PATCH 01/10] Use AMD CI workflow defined in hf-workflows

---
 .../workflows/self-push-amd-mi210-caller.yml  |  50 +--
 .../workflows/self-push-amd-mi250-caller.yml  |  50 +--
 .../workflows/self-push-amd-mi300-caller.yml  |   2 +-
 .github/workflows/self-push-amd.yml           | 335 ------------------
 4 files changed, 51 insertions(+), 386 deletions(-)
 delete mode 100644 .github/workflows/self-push-amd.yml

diff --git a/.github/workflows/self-push-amd-mi210-caller.yml b/.github/workflows/self-push-amd-mi210-caller.yml
index a401e40ee7f164..08b73610563089 100644
--- a/.github/workflows/self-push-amd-mi210-caller.yml
+++ b/.github/workflows/self-push-amd-mi210-caller.yml
@@ -1,25 +1,25 @@
-name: Self-hosted runner (AMD mi210 CI caller)
-
-on:
-  workflow_run:
-    workflows: ["Self-hosted runner (push-caller)"]
-    branches: ["main"]
-    types: [completed]
-  push:
-    branches:
-      - run_amd_push_ci_caller*
-    paths:
-      - "src/**"
-      - "tests/**"
-      - ".github/**"
-      - "templates/**"
-      - "utils/**"
-
-jobs:
-  run_amd_ci:
-    name: AMD mi210
-    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
-    uses: ./.github/workflows/self-push-amd.yml
-    with:
-      gpu_flavor: mi210
-    secrets: inherit
+name: Self-hosted runner (AMD mi210 CI caller)
+
+on:
+  workflow_run:
+    workflows: ["Self-hosted runner (push-caller)"]
+    branches: ["main"]
+    types: [completed]
+  push:
+    branches:
+      - run_amd_push_ci_caller*
+    paths:
+      - "src/**"
+      - "tests/**"
+      - ".github/**"
+      - "templates/**"
+      - "utils/**"
+
+jobs:
+  run_amd_ci:
+    name: AMD mi210
+    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci.yaml@main
+    with:
+      gpu_flavor: mi210
+    secrets: inherit
diff --git a/.github/workflows/self-push-amd-mi250-caller.yml b/.github/workflows/self-push-amd-mi250-caller.yml
index fef532703170cb..b83928052cfc9e 100644
--- a/.github/workflows/self-push-amd-mi250-caller.yml
+++ b/.github/workflows/self-push-amd-mi250-caller.yml
@@ -1,25 +1,25 @@
-name: Self-hosted runner (AMD mi250 CI caller)
-
-on:
-  workflow_run:
-    workflows: ["Self-hosted runner (push-caller)"]
-    branches: ["main"]
-    types: [completed]
-  push:
-    branches:
-      - run_amd_push_ci_caller*
-    paths:
-      - "src/**"
-      - "tests/**"
-      - ".github/**"
-      - "templates/**"
-      - "utils/**"
-
-jobs:
-  run_amd_ci:
-    name: AMD mi250
-    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
-    uses: ./.github/workflows/self-push-amd.yml
-    with:
-      gpu_flavor: mi250
-    secrets: inherit
+name: Self-hosted runner (AMD mi250 CI caller)
+
+on:
+  workflow_run:
+    workflows: ["Self-hosted runner (push-caller)"]
+    branches: ["main"]
+    types: [completed]
+  push:
+    branches:
+      - run_amd_push_ci_caller*
+    paths:
+      - "src/**"
+      - "tests/**"
+      - ".github/**"
+      - "templates/**"
+      - "utils/**"
+
+jobs:
+  run_amd_ci:
+    name: AMD mi250
+    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci.yaml@main
+    with:
+      gpu_flavor: mi250
+    secrets: inherit
diff --git a/.github/workflows/self-push-amd-mi300-caller.yml b/.github/workflows/self-push-amd-mi300-caller.yml
index a8ee4e540ecf3f..cb1a315be7e819 100644
--- a/.github/workflows/self-push-amd-mi300-caller.yml
+++ b/.github/workflows/self-push-amd-mi300-caller.yml
@@ -19,7 +19,7 @@ jobs:
   run_amd_ci:
     name: AMD mi300
     if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && (startsWith(github.ref_name, 'run_amd_push_ci_caller') || startsWith(github.ref_name, 'mi300-ci'))))
-    uses: ./.github/workflows/self-push-amd.yml
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci.yaml@main
     with:
       gpu_flavor: mi300
     secrets: inherit
diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml
deleted file mode 100644
index 6931c2f3eadcad..00000000000000
--- a/.github/workflows/self-push-amd.yml
+++ /dev/null
@@ -1,335 +0,0 @@
-name: Self-hosted runner AMD GPU (push)
-
-on:
-  workflow_call:
-    inputs:
-      gpu_flavor:
-        required: true
-        type: string
-
-env:
-  HF_HOME: /mnt/cache
-  TRANSFORMERS_IS_CI: yes
-  OMP_NUM_THREADS: 8
-  MKL_NUM_THREADS: 8
-  PYTEST_TIMEOUT: 60
-  TF_FORCE_GPU_ALLOW_GROWTH: true
-  RUN_PT_TF_CROSS_TESTS: 1
-  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-
-jobs:
-  check_runner_status:
-    name: Check Runner Status
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Checkout transformers
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 2
-
-      - name: Check Runner Status
-        run: python utils/check_self_hosted_runner.py --target_runners amd-mi210-single-gpu-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
-
-  check_runners:
-    name: Check Runners
-    needs: check_runner_status
-    strategy:
-      matrix:
-        machine_type: [single-gpu, multi-gpu]
-    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
-    container:
-      image: huggingface/transformers-pytorch-amd-gpu-push-ci  # <--- We test only for PyTorch for now
-      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: ROCM-SMI
-        run: |
-          rocm-smi
-      - name: ROCM-INFO
-        run: |
-          rocminfo  | grep "Agent" -A 14
-      - name: Show ROCR environment
-        run: |
-          echo "ROCR: $ROCR_VISIBLE_DEVICES"
-
-  setup_gpu:
-    name: Setup
-    needs: check_runners
-    strategy:
-      matrix:
-        machine_type: [single-gpu, multi-gpu]
-    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
-    container:
-      image: huggingface/transformers-pytorch-amd-gpu-push-ci  # <--- We test only for PyTorch for now
-      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    outputs:
-      matrix: ${{ steps.set-matrix.outputs.matrix }}
-      test_map: ${{ steps.set-matrix.outputs.test_map }}
-    env:
-      # `CI_BRANCH_PUSH`: The branch name from the push event
-      # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event
-      # `CI_SHA_PUSH`: The commit SHA from the push event
-      # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
-    steps:
-      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
-      # We also take into account the `push` event (we might want to test some changes in a branch)
-      - name: Prepare custom environment variables
-        shell: bash
-        # `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty)
-        # `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty)
-        run: |
-          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
-          echo $CI_BRANCH_PUSH
-          echo $CI_BRANCH_WORKFLOW_RUN
-          echo $CI_SHA_PUSH
-          echo $CI_SHA_WORKFLOW_RUN
-          [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
-          [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
-
-      - name: print environment variables
-        run: |
-          echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
-          echo "env.CI_SHA = ${{ env.CI_SHA }}"
-
-      - name: Update clone using environment variables
-        working-directory: /transformers
-        run: |
-          echo "original branch = $(git branch --show-current)"
-          git fetch && git checkout ${{ env.CI_BRANCH }}
-          echo "updated branch = $(git branch --show-current)"
-          git checkout ${{ env.CI_SHA }}
-          echo "log = $(git log -n 1)"
-
-      - name: Cleanup
-        working-directory: /transformers
-        run: |
-          rm -rf tests/__pycache__
-          rm -rf tests/models/__pycache__
-          rm -rf reports
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Fetch the tests to run
-        working-directory: /transformers
-        # TODO: add `git-python` in the docker images
-        run: |
-          pip install --upgrade git-python
-          python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
-
-      - name: Report fetched tests
-        uses: actions/upload-artifact@v4
-        with:
-          name: test_fetched
-          path: /transformers/test_preparation.txt
-
-      - id: set-matrix
-        name: Organize tests into models
-        working-directory: /transformers
-        # The `keys` is used as GitHub actions matrix for jobs, i.e. `models/bert`, `tokenization`, `pipeline`, etc.
-        # The `test_map` is used to get the actual identified test files under each key.
-        # If no test to run (so no `test_map.json` file), create a dummy map (empty matrix will fail)
-        run: |
-          if [ -f test_map.json ]; then
-              keys=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); d = list(test_map.keys()); print(d)')
-              test_map=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); print(test_map)')
-          else
-              keys=$(python3 -c 'keys = ["dummy"]; print(keys)')
-              test_map=$(python3 -c 'test_map = {"dummy": []}; print(test_map)')
-          fi
-          echo $keys
-          echo $test_map
-          echo "matrix=$keys" >> $GITHUB_OUTPUT
-          echo "test_map=$test_map" >> $GITHUB_OUTPUT
-
-  run_models_gpu:
-    name: Model tests
-    needs: setup_gpu
-    # `dummy` means there is no test to run
-    if: contains(fromJson(needs.setup_gpu.outputs.matrix), 'dummy') != true
-    strategy:
-      fail-fast: false
-      matrix:
-        folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }}
-        machine_type: [single-gpu, multi-gpu]
-    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
-    container:
-      image: huggingface/transformers-pytorch-amd-gpu-push-ci  # <--- We test only for PyTorch for now
-      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    env:
-      # For the meaning of these environment variables, see the job `Setup`
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
-    steps:
-      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
-      # We also take into account the `push` event (we might want to test some changes in a branch)
-      - name: Prepare custom environment variables
-        shell: bash
-        # For the meaning of these environment variables, see the job `Setup`
-        run: |
-          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
-          echo $CI_BRANCH_PUSH
-          echo $CI_BRANCH_WORKFLOW_RUN
-          echo $CI_SHA_PUSH
-          echo $CI_SHA_WORKFLOW_RUN
-          [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
-          [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
-
-      - name: print environment variables
-        run: |
-          echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
-          echo "env.CI_SHA = ${{ env.CI_SHA }}"
-
-      - name: Update clone using environment variables
-        working-directory: /transformers
-        run: |
-          echo "original branch = $(git branch --show-current)"
-          git fetch && git checkout ${{ env.CI_BRANCH }}
-          echo "updated branch = $(git branch --show-current)"
-          git checkout ${{ env.CI_SHA }}
-          echo "log = $(git log -n 1)"
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: Echo folder ${{ matrix.folders }}
-        shell: bash
-        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
-        # set the artifact folder names (because the character `/` is not allowed).
-        run: |
-          echo "${{ matrix.folders }}"
-          echo "${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }}"
-          matrix_folders=${{ matrix.folders }}
-          matrix_folders=${matrix_folders/'models/'/'models_'}
-          echo "$matrix_folders"
-          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
-
-      - name: ROCM-SMI
-        run: |
-          rocm-smi
-      - name: ROCM-INFO
-        run: |
-          rocminfo  | grep "Agent" -A 14
-      - name: Show ROCR environment
-        run: |
-          echo "ROCR: $ROCR_VISIBLE_DEVICES"
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Run all non-slow selected tests on GPU
-        working-directory: /transformers
-        run: |
-          python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports ${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }} -m "not not_device_test"
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
-          path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
-
-  send_results:
-    name: Send results to webhook
-    runs-on: ubuntu-22.04
-    if: always()
-    needs: [
-        check_runner_status,
-        check_runners,
-        setup_gpu,
-        run_models_gpu,
-#        run_tests_torch_cuda_extensions_single_gpu,
-#        run_tests_torch_cuda_extensions_multi_gpu
-    ]
-    env:
-      # For the meaning of these environment variables, see the job `Setup`
-      CI_BRANCH_PUSH: ${{ github.event.ref }}
-      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
-      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
-      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
-    steps:
-      - name: Preliminary job status
-        shell: bash
-        # For the meaning of these environment variables, see the job `Setup`
-        run: |
-          echo "Runner availability: ${{ needs.check_runner_status.result }}"
-          echo "Setup status: ${{ needs.setup_gpu.result }}"
-          echo "Runner status: ${{ needs.check_runners.result }}"
-
-      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
-      # We also take into account the `push` event (we might want to test some changes in a branch)
-      - name: Prepare custom environment variables
-        shell: bash
-        # For the meaning of these environment variables, see the job `Setup`
-        run: |
-          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
-          echo $CI_BRANCH_PUSH
-          echo $CI_BRANCH_WORKFLOW_RUN
-          echo $CI_SHA_PUSH
-          echo $CI_SHA_WORKFLOW_RUN
-          [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
-          [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
-
-      - name: print environment variables
-        run: |
-          echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
-          echo "env.CI_SHA = ${{ env.CI_SHA }}"
-
-      - uses: actions/checkout@v4
-        # To avoid failure when multiple commits are merged into `main` in a short period of time.
-        # Checking out to an old commit beyond the fetch depth will get an error `fatal: reference is not a tree: ...
-        # (Only required for `workflow_run` event, where we get the latest HEAD on `main` instead of the event commit)
-        with:
-          fetch-depth: 20
-
-      - name: Update clone using environment variables
-        run: |
-          echo "original branch = $(git branch --show-current)"
-          git fetch && git checkout ${{ env.CI_BRANCH }}
-          echo "updated branch = $(git branch --show-current)"
-          git checkout ${{ env.CI_SHA }}
-          echo "log = $(git log -n 1)"
-
-      - uses: actions/download-artifact@v4
-      - name: Send message to Slack
-        env:
-          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
-          CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
-          CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
-          CI_SLACK_CHANNEL_ID_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }}
-          CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
-          CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }}
-          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
-          CI_EVENT: Push CI (AMD) - ${{ inputs.gpu_flavor }}
-          CI_TITLE_PUSH: ${{ github.event.head_commit.message }}
-          CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }}
-          CI_SHA: ${{ env.CI_SHA }}
-          RUNNER_STATUS: ${{ needs.check_runner_status.result }}
-          RUNNER_ENV_STATUS: ${{ needs.check_runners.result }}
-          SETUP_STATUS: ${{ needs.setup_gpu.result }}
-
-        # We pass `needs.setup_gpu.outputs.matrix` as the argument. A processing in `notification_service.py` to change
-        # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
-        run: |
-          pip install huggingface_hub
-          pip install slack_sdk
-          pip show slack_sdk
-          python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}"

From 32b371863f3801c7517c5176dd6fd6ee10c195f7 Mon Sep 17 00:00:00 2001
From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com>
Date: Wed, 4 Dec 2024 13:35:23 +0100
Subject: [PATCH 02/10] Use hf-workflows for both push and scheduled AMD CI

---
 .../workflows/self-push-amd-mi210-caller.yml  |   2 +-
 .../workflows/self-push-amd-mi250-caller.yml  |   2 +-
 .../workflows/self-push-amd-mi300-caller.yml  |   2 +-
 .../self-scheduled-amd-mi210-caller.yml       | 110 +++---
 .../self-scheduled-amd-mi250-caller.yml       | 110 +++---
 .github/workflows/self-scheduled-amd.yml      | 349 ------------------
 6 files changed, 113 insertions(+), 462 deletions(-)
 delete mode 100644 .github/workflows/self-scheduled-amd.yml

diff --git a/.github/workflows/self-push-amd-mi210-caller.yml b/.github/workflows/self-push-amd-mi210-caller.yml
index 08b73610563089..5612304389581d 100644
--- a/.github/workflows/self-push-amd-mi210-caller.yml
+++ b/.github/workflows/self-push-amd-mi210-caller.yml
@@ -19,7 +19,7 @@ jobs:
   run_amd_ci:
     name: AMD mi210
     if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci.yaml@main
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_push.yaml@main
     with:
       gpu_flavor: mi210
     secrets: inherit
diff --git a/.github/workflows/self-push-amd-mi250-caller.yml b/.github/workflows/self-push-amd-mi250-caller.yml
index b83928052cfc9e..ea2801f2c4a4e7 100644
--- a/.github/workflows/self-push-amd-mi250-caller.yml
+++ b/.github/workflows/self-push-amd-mi250-caller.yml
@@ -19,7 +19,7 @@ jobs:
   run_amd_ci:
     name: AMD mi250
     if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci.yaml@main
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_push.yaml@main
     with:
       gpu_flavor: mi250
     secrets: inherit
diff --git a/.github/workflows/self-push-amd-mi300-caller.yml b/.github/workflows/self-push-amd-mi300-caller.yml
index cb1a315be7e819..08d9155419465a 100644
--- a/.github/workflows/self-push-amd-mi300-caller.yml
+++ b/.github/workflows/self-push-amd-mi300-caller.yml
@@ -19,7 +19,7 @@ jobs:
   run_amd_ci:
     name: AMD mi300
     if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && (startsWith(github.ref_name, 'run_amd_push_ci_caller') || startsWith(github.ref_name, 'mi300-ci'))))
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci.yaml@main
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_push.yaml@main
     with:
       gpu_flavor: mi300
     secrets: inherit
diff --git a/.github/workflows/self-scheduled-amd-mi210-caller.yml b/.github/workflows/self-scheduled-amd-mi210-caller.yml
index 1c79b38a314e0b..6109faca00932e 100644
--- a/.github/workflows/self-scheduled-amd-mi210-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi210-caller.yml
@@ -1,55 +1,55 @@
-name: Self-hosted runner (AMD mi210 scheduled CI caller)
-
-on:
-  workflow_run:
-    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
-    branches: ["main"]
-    types: [completed]
-  push:
-    branches:
-      - run_amd_scheduled_ci_caller*
-
-jobs:
-  model-ci:
-    name: Model CI
-    uses: ./.github/workflows/self-scheduled-amd.yml
-    with:
-      job: run_models_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi210
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi210
-    secrets: inherit
-
-  torch-pipeline:
-    name: Torch pipeline CI
-    uses: ./.github/workflows/self-scheduled-amd.yml
-    with:
-      job: run_pipelines_torch_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi210
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi210
-    secrets: inherit
-
-  example-ci:
-    name: Example CI
-    uses: ./.github/workflows/self-scheduled-amd.yml
-    with:
-      job: run_examples_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi210
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi210
-    secrets: inherit
-
-  deepspeed-ci:
-    name: DeepSpeed CI
-    uses: ./.github/workflows/self-scheduled-amd.yml
-    with:
-      job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi210
-      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi210
-    secrets: inherit
+name: Self-hosted runner (AMD mi210 scheduled CI caller)
+
+on:
+  workflow_run:
+    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
+    branches: ["main"]
+    types: [completed]
+  push:
+    branches:
+      - run_amd_scheduled_ci_caller*
+
+jobs:
+  model-ci:
+    name: Model CI
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
+    with:
+      job: run_models_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi210
+      docker: huggingface/transformers-pytorch-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi210
+    secrets: inherit
+
+  torch-pipeline:
+    name: Torch pipeline CI
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
+    with:
+      job: run_pipelines_torch_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi210
+      docker: huggingface/transformers-pytorch-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi210
+    secrets: inherit
+
+  example-ci:
+    name: Example CI
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
+    with:
+      job: run_examples_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi210
+      docker: huggingface/transformers-pytorch-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi210
+    secrets: inherit
+
+  deepspeed-ci:
+    name: DeepSpeed CI
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
+    with:
+      job: run_torch_cuda_extensions_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi210
+      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi210
+    secrets: inherit
diff --git a/.github/workflows/self-scheduled-amd-mi250-caller.yml b/.github/workflows/self-scheduled-amd-mi250-caller.yml
index fd151305716396..a33b6e579c0ef3 100644
--- a/.github/workflows/self-scheduled-amd-mi250-caller.yml
+++ b/.github/workflows/self-scheduled-amd-mi250-caller.yml
@@ -1,55 +1,55 @@
-name: Self-hosted runner (AMD mi250 scheduled CI caller)
-
-on:
-  workflow_run:
-    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
-    branches: ["main"]
-    types: [completed]
-  push:
-    branches:
-      - run_amd_scheduled_ci_caller*
-
-jobs:
-  model-ci:
-    name: Model CI
-    uses: ./.github/workflows/self-scheduled-amd.yml
-    with:
-      job: run_models_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi250
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi250
-    secrets: inherit
-
-  torch-pipeline:
-    name: Torch pipeline CI
-    uses: ./.github/workflows/self-scheduled-amd.yml
-    with:
-      job: run_pipelines_torch_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi250
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi250
-    secrets: inherit
-
-  example-ci:
-    name: Example CI
-    uses: ./.github/workflows/self-scheduled-amd.yml
-    with:
-      job: run_examples_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi250
-      docker: huggingface/transformers-pytorch-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi250
-    secrets: inherit
-
-  deepspeed-ci:
-    name: DeepSpeed CI
-    uses: ./.github/workflows/self-scheduled-amd.yml
-    with:
-      job: run_torch_cuda_extensions_gpu
-      slack_report_channel: "#transformers-ci-daily-amd"
-      runner: mi250
-      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
-      ci_event: Scheduled CI (AMD) - mi250
-    secrets: inherit
+name: Self-hosted runner (AMD mi250 scheduled CI caller)
+
+on:
+  workflow_run:
+    workflows: ["Self-hosted runner (AMD scheduled CI caller)"]
+    branches: ["main"]
+    types: [completed]
+  push:
+    branches:
+      - run_amd_scheduled_ci_caller*
+
+jobs:
+  model-ci:
+    name: Model CI
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
+    with:
+      job: run_models_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi250
+      docker: huggingface/transformers-pytorch-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi250
+    secrets: inherit
+
+  torch-pipeline:
+    name: Torch pipeline CI
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
+    with:
+      job: run_pipelines_torch_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi250
+      docker: huggingface/transformers-pytorch-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi250
+    secrets: inherit
+
+  example-ci:
+    name: Example CI
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
+    with:
+      job: run_examples_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi250
+      docker: huggingface/transformers-pytorch-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi250
+    secrets: inherit
+
+  deepspeed-ci:
+    name: DeepSpeed CI
+    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_scheduled.yaml@main
+    with:
+      job: run_torch_cuda_extensions_gpu
+      slack_report_channel: "#transformers-ci-daily-amd"
+      runner: mi250
+      docker: huggingface/transformers-pytorch-deepspeed-amd-gpu
+      ci_event: Scheduled CI (AMD) - mi250
+    secrets: inherit
diff --git a/.github/workflows/self-scheduled-amd.yml b/.github/workflows/self-scheduled-amd.yml
deleted file mode 100644
index 47f92cd6a2b086..00000000000000
--- a/.github/workflows/self-scheduled-amd.yml
+++ /dev/null
@@ -1,349 +0,0 @@
-name: Self-hosted runner (scheduled-amd)
-
-# Note: For the AMD CI, we rely on a caller workflow and on the workflow_call event to trigger the
-# CI in order to run it on both MI210 and MI250, without having to use matrix here which pushes
-# us towards the limit of allowed jobs on GitHub Actions.
-
-on:
-  workflow_call:
-    inputs:
-      job:
-        required: true
-        type: string
-      slack_report_channel:
-        required: true
-        type: string
-      runner:
-        required: true
-        type: string
-      docker:
-        required: true
-        type: string
-      ci_event:
-        required: true
-        type: string
-
-env:
-  HF_HOME: /mnt/cache
-  TRANSFORMERS_IS_CI: yes
-  OMP_NUM_THREADS: 8
-  MKL_NUM_THREADS: 8
-  RUN_SLOW: yes
-  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-  SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }}
-  NUM_SLICES: 2
-
-# Important note: each job (run_tests_single_gpu, run_tests_multi_gpu, run_examples_gpu, run_pipelines_torch_gpu) requires all the previous jobs before running.
-# This is done so that we avoid parallelizing the scheduled tests, to leave available
-# runners for the push CI that is running on the same machine.
-jobs:
-  check_runner_status:
-    name: Check Runner Status
-    runs-on: ubuntu-22.04
-    steps:
-      - name: Checkout transformers
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 2
-
-      - name: Check Runner Status
-        run: python utils/check_self_hosted_runner.py --target_runners hf-amd-mi210-ci-1gpu-1,hf-amd-mi250-ci-1gpu-1,hf-amd-mi300-ci-1gpu-1 --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
-
-  check_runners:
-    name: Check Runners
-    needs: check_runner_status
-    strategy:
-      matrix:
-        machine_type: [single-gpu, multi-gpu]
-    runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
-    container:
-      image: huggingface/transformers-pytorch-amd-gpu
-      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: ROCM-SMI
-        run: |
-          rocm-smi
-
-      - name: ROCM-INFO
-        run: |
-          rocminfo  | grep "Agent" -A 14
-
-      - name: Show ROCR environment
-        run: |
-          echo "ROCR: $ROCR_VISIBLE_DEVICES"
-
-  setup:
-    if: contains(fromJSON('["run_models_gpu"]'), inputs.job)
-    name: Setup
-    needs: check_runners
-    strategy:
-      matrix:
-        machine_type: [single-gpu, multi-gpu]
-    runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
-    container:
-      image: huggingface/transformers-pytorch-amd-gpu
-      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    outputs:
-      folder_slices: ${{ steps.set-matrix.outputs.folder_slices }}
-      slice_ids: ${{ steps.set-matrix.outputs.slice_ids }}
-    steps:
-      - name: Update clone
-        working-directory: /transformers
-        run: |
-          git fetch && git checkout ${{ github.sha }}
-
-      - name: Cleanup
-        working-directory: /transformers
-        run: |
-          rm -rf tests/__pycache__
-          rm -rf tests/models/__pycache__
-          rm -rf reports
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - id: set-matrix
-        name: Identify models to test
-        working-directory: /transformers/tests
-        run: |
-          echo "folder_slices=$(python3 ../utils/split_model_tests.py --num_splits ${{ env.NUM_SLICES }})" >> $GITHUB_OUTPUT
-          echo "slice_ids=$(python3 -c 'd = list(range(${{ env.NUM_SLICES }})); print(d)')" >> $GITHUB_OUTPUT
-
-      - name: ROCM-SMI
-        run: |
-          rocm-smi
-
-      - name: ROCM-INFO
-        run: |
-          rocminfo  | grep "Agent" -A 14
-
-      - name: Show ROCR environment
-        run: |
-          echo "ROCR: $ROCR_VISIBLE_DEVICES"
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-  run_models_gpu:
-    if: ${{ inputs.job == 'run_models_gpu' }}
-    name: Single GPU tests
-    needs: setup
-    strategy:
-      max-parallel: 1  # For now, not to parallelize. Can change later if it works well.
-      fail-fast: false
-      matrix:
-        machine_type: [single-gpu, multi-gpu]
-        slice_id: ${{ fromJSON(needs.setup.outputs.slice_ids) }}
-    uses: ./.github/workflows/model_jobs_amd.yml
-    with:
-      folder_slices: ${{ needs.setup.outputs.folder_slices }}
-      machine_type: ${{ matrix.machine_type }}
-      slice_id: ${{ matrix.slice_id }}
-      runner: ${{ inputs.runner }}
-      docker: ${{ inputs.docker }}
-    secrets: inherit
-
-  run_pipelines_torch_gpu:
-    if: ${{ inputs.job == 'run_pipelines_torch_gpu' }}
-    name: PyTorch pipelines
-    needs: check_runners
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [single-gpu, multi-gpu]
-    runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
-    container:
-      image: ${{ inputs.docker }}
-      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: Update clone
-        working-directory: /transformers
-        run: git fetch && git checkout ${{ github.sha }}
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: ROCM-SMI
-        run: |
-          rocm-smi
-
-      - name: ROCM-INFO
-        run: |
-          rocminfo  | grep "Agent" -A 14
-
-      - name: Show ROCR environment
-        run: |
-          echo "ROCR: $ROCR_VISIBLE_DEVICES"
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Run all pipeline tests on GPU
-        working-directory: /transformers
-        run: |
-          python3 -m pytest -n 1 -v --dist=loadfile --make-reports=${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports tests/pipelines -m "not not_device_test"
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
-          path: /transformers/reports/${{ matrix.machine_type }}_run_pipelines_torch_gpu_test_reports
-
-  run_examples_gpu:
-    if: ${{ inputs.job == 'run_examples_gpu' }}
-    name: Examples directory
-    needs: check_runners
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [single-gpu]
-    runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
-    container:
-      image: ${{ inputs.docker }}
-      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: Update clone
-        working-directory: /transformers
-        run: git fetch && git checkout ${{ github.sha }}
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: ROCM-SMI
-        run: |
-          rocm-smi
-
-      - name: ROCM-INFO
-        run: |
-          rocminfo  | grep "Agent" -A 14
-
-      - name: Show ROCR environment
-        run: |
-          echo "ROCR: $ROCR_VISIBLE_DEVICES"
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Run examples tests on GPU
-        working-directory: /transformers
-        run: |
-          pip install -r examples/pytorch/_tests_requirements.txt
-          python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_examples_gpu_test_reports examples/pytorch -m "not not_device_test"
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.machine_type }}_run_examples_gpu_test_reports
-          path: /transformers/reports/${{ matrix.machine_type }}_run_examples_gpu_test_reports
-
-  run_torch_cuda_extensions_gpu:
-    if: ${{ inputs.job == 'run_torch_cuda_extensions_gpu' }}
-    name: Torch ROCm deepspeed tests
-    needs: check_runners
-    strategy:
-      fail-fast: false
-      matrix:
-        machine_type: [single-gpu, multi-gpu]
-    runs-on: ['${{ matrix.machine_type }}', self-hosted, amd-gpu, '${{ inputs.runner }}']
-    container:
-      image: ${{ inputs.docker }}
-      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
-    steps:
-      - name: Update clone
-        working-directory: /transformers
-        run: git fetch && git checkout ${{ github.sha }}
-
-      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
-        working-directory: /transformers
-        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
-
-      - name: ROCM-SMI
-        run: |
-          rocm-smi
-
-      - name: ROCM-INFO
-        run: |
-          rocminfo  | grep "Agent" -A 14
-
-      - name: Show ROCR environment
-        run: |
-          echo "ROCR: $ROCR_VISIBLE_DEVICES"
-
-      - name: Environment
-        working-directory: /transformers
-        run: |
-          python3 utils/print_env.py
-
-      - name: Show installed libraries and their versions
-        working-directory: /transformers
-        run: pip freeze
-
-      - name: Run all tests on GPU
-        working-directory: /transformers
-        run: python3 -m pytest -v --make-reports=${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports tests/deepspeed tests/extended -m "not not_device_test"
-
-      - name: Failure short reports
-        if: ${{ failure() }}
-        continue-on-error: true
-        run: cat /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports/failures_short.txt
-
-      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports"
-        if: ${{ always() }}
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
-          path: /transformers/reports/${{ matrix.machine_type }}_run_torch_cuda_extensions_gpu_test_reports
-
-  send_results:
-    name: Slack Report
-    needs: [
-      check_runner_status,
-      check_runners,
-      setup,
-      run_models_gpu,
-      run_pipelines_torch_gpu,
-      run_examples_gpu,
-      run_torch_cuda_extensions_gpu
-    ]
-    if: ${{ always() }}
-    uses: ./.github/workflows/slack-report.yml
-    with:
-      job: ${{ inputs.job }}
-      # This would be `skipped` if `setup` is skipped.
-      setup_status: ${{ needs.setup.result }}
-      slack_report_channel: ${{ inputs.slack_report_channel }}
-      # This would be an empty string if `setup` is skipped.
-      folder_slices: ${{ needs.setup.outputs.folder_slices }}
-      quantization_matrix: ${{ needs.setup.outputs.quantization_matrix }}
-      ci_event: ${{ inputs.ci_event }}
-
-    secrets: inherit

From 6054220b77fa92187c6461aa65b1a1ff88222219 Mon Sep 17 00:00:00 2001
From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com>
Date: Fri, 6 Dec 2024 16:02:47 +0100
Subject: [PATCH 03/10] Revert deletion of self-push-amd.yml for now

---
 .../workflows/self-push-amd-mi210-caller.yml  |   2 +-
 .../workflows/self-push-amd-mi250-caller.yml  |   2 +-
 .../workflows/self-push-amd-mi300-caller.yml  |   2 +-
 .github/workflows/self-push-amd.yml           | 335 ++++++++++++++++++
 4 files changed, 338 insertions(+), 3 deletions(-)
 create mode 100644 .github/workflows/self-push-amd.yml

diff --git a/.github/workflows/self-push-amd-mi210-caller.yml b/.github/workflows/self-push-amd-mi210-caller.yml
index 5612304389581d..2fb075eb212190 100644
--- a/.github/workflows/self-push-amd-mi210-caller.yml
+++ b/.github/workflows/self-push-amd-mi210-caller.yml
@@ -19,7 +19,7 @@ jobs:
   run_amd_ci:
     name: AMD mi210
     if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_push.yaml@main
+    uses: ./.github/workflows/self-push-amd.yml
     with:
       gpu_flavor: mi210
     secrets: inherit
diff --git a/.github/workflows/self-push-amd-mi250-caller.yml b/.github/workflows/self-push-amd-mi250-caller.yml
index ea2801f2c4a4e7..2485a78c33df3c 100644
--- a/.github/workflows/self-push-amd-mi250-caller.yml
+++ b/.github/workflows/self-push-amd-mi250-caller.yml
@@ -19,7 +19,7 @@ jobs:
   run_amd_ci:
     name: AMD mi250
     if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_push.yaml@main
+    uses: ./.github/workflows/self-push-amd.yml
     with:
       gpu_flavor: mi250
     secrets: inherit
diff --git a/.github/workflows/self-push-amd-mi300-caller.yml b/.github/workflows/self-push-amd-mi300-caller.yml
index 08d9155419465a..a8ee4e540ecf3f 100644
--- a/.github/workflows/self-push-amd-mi300-caller.yml
+++ b/.github/workflows/self-push-amd-mi300-caller.yml
@@ -19,7 +19,7 @@ jobs:
   run_amd_ci:
     name: AMD mi300
     if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && (startsWith(github.ref_name, 'run_amd_push_ci_caller') || startsWith(github.ref_name, 'mi300-ci'))))
-    uses: huggingface/hf-workflows/.github/workflows/transformers_amd_ci_push.yaml@main
+    uses: ./.github/workflows/self-push-amd.yml
     with:
       gpu_flavor: mi300
     secrets: inherit
diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml
new file mode 100644
index 00000000000000..6931c2f3eadcad
--- /dev/null
+++ b/.github/workflows/self-push-amd.yml
@@ -0,0 +1,335 @@
+name: Self-hosted runner AMD GPU (push)
+
+on:
+  workflow_call:
+    inputs:
+      gpu_flavor:
+        required: true
+        type: string
+
+env:
+  HF_HOME: /mnt/cache
+  TRANSFORMERS_IS_CI: yes
+  OMP_NUM_THREADS: 8
+  MKL_NUM_THREADS: 8
+  PYTEST_TIMEOUT: 60
+  TF_FORCE_GPU_ALLOW_GROWTH: true
+  RUN_PT_TF_CROSS_TESTS: 1
+  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
+
+jobs:
+  check_runner_status:
+    name: Check Runner Status
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Checkout transformers
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 2
+
+      - name: Check Runner Status
+        run: python utils/check_self_hosted_runner.py --target_runners amd-mi210-single-gpu-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
+
+  check_runners:
+    name: Check Runners
+    needs: check_runner_status
+    strategy:
+      matrix:
+        machine_type: [single-gpu, multi-gpu]
+    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
+    container:
+      image: huggingface/transformers-pytorch-amd-gpu-push-ci  # <--- We test only for PyTorch for now
+      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    steps:
+      - name: ROCM-SMI
+        run: |
+          rocm-smi
+      - name: ROCM-INFO
+        run: |
+          rocminfo  | grep "Agent" -A 14
+      - name: Show ROCR environment
+        run: |
+          echo "ROCR: $ROCR_VISIBLE_DEVICES"
+
+  setup_gpu:
+    name: Setup
+    needs: check_runners
+    strategy:
+      matrix:
+        machine_type: [single-gpu, multi-gpu]
+    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
+    container:
+      image: huggingface/transformers-pytorch-amd-gpu-push-ci  # <--- We test only for PyTorch for now
+      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+      test_map: ${{ steps.set-matrix.outputs.test_map }}
+    env:
+      # `CI_BRANCH_PUSH`: The branch name from the push event
+      # `CI_BRANCH_WORKFLOW_RUN`: The name of the branch on which this workflow is triggered by `workflow_run` event
+      # `CI_SHA_PUSH`: The commit SHA from the push event
+      # `CI_SHA_WORKFLOW_RUN`: The commit SHA that triggers this workflow by `workflow_run` event
+      CI_BRANCH_PUSH: ${{ github.event.ref }}
+      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
+      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
+      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
+    steps:
+      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
+      # We also take into account the `push` event (we might want to test some changes in a branch)
+      - name: Prepare custom environment variables
+        shell: bash
+        # `CI_BRANCH`: The non-empty branch name from the above two (one and only one of them is empty)
+        # `CI_SHA`: The non-empty commit SHA from the above two (one and only one of them is empty)
+        run: |
+          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
+          echo $CI_BRANCH_PUSH
+          echo $CI_BRANCH_WORKFLOW_RUN
+          echo $CI_SHA_PUSH
+          echo $CI_SHA_WORKFLOW_RUN
+          [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
+          [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
+
+      - name: print environment variables
+        run: |
+          echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
+          echo "env.CI_SHA = ${{ env.CI_SHA }}"
+
+      - name: Update clone using environment variables
+        working-directory: /transformers
+        run: |
+          echo "original branch = $(git branch --show-current)"
+          git fetch && git checkout ${{ env.CI_BRANCH }}
+          echo "updated branch = $(git branch --show-current)"
+          git checkout ${{ env.CI_SHA }}
+          echo "log = $(git log -n 1)"
+
+      - name: Cleanup
+        working-directory: /transformers
+        run: |
+          rm -rf tests/__pycache__
+          rm -rf tests/models/__pycache__
+          rm -rf reports
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Fetch the tests to run
+        working-directory: /transformers
+        # TODO: add `git-python` in the docker images
+        run: |
+          pip install --upgrade git-python
+          python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt
+
+      - name: Report fetched tests
+        uses: actions/upload-artifact@v4
+        with:
+          name: test_fetched
+          path: /transformers/test_preparation.txt
+
+      - id: set-matrix
+        name: Organize tests into models
+        working-directory: /transformers
+        # The `keys` is used as GitHub actions matrix for jobs, i.e. `models/bert`, `tokenization`, `pipeline`, etc.
+        # The `test_map` is used to get the actual identified test files under each key.
+        # If no test to run (so no `test_map.json` file), create a dummy map (empty matrix will fail)
+        run: |
+          if [ -f test_map.json ]; then
+              keys=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); d = list(test_map.keys()); print(d)')
+              test_map=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); print(test_map)')
+          else
+              keys=$(python3 -c 'keys = ["dummy"]; print(keys)')
+              test_map=$(python3 -c 'test_map = {"dummy": []}; print(test_map)')
+          fi
+          echo $keys
+          echo $test_map
+          echo "matrix=$keys" >> $GITHUB_OUTPUT
+          echo "test_map=$test_map" >> $GITHUB_OUTPUT
+
+  run_models_gpu:
+    name: Model tests
+    needs: setup_gpu
+    # `dummy` means there is no test to run
+    if: contains(fromJson(needs.setup_gpu.outputs.matrix), 'dummy') != true
+    strategy:
+      fail-fast: false
+      matrix:
+        folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }}
+        machine_type: [single-gpu, multi-gpu]
+    runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}']
+    container:
+      image: huggingface/transformers-pytorch-amd-gpu-push-ci  # <--- We test only for PyTorch for now
+      options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+    env:
+      # For the meaning of these environment variables, see the job `Setup`
+      CI_BRANCH_PUSH: ${{ github.event.ref }}
+      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
+      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
+      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
+    steps:
+      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
+      # We also take into account the `push` event (we might want to test some changes in a branch)
+      - name: Prepare custom environment variables
+        shell: bash
+        # For the meaning of these environment variables, see the job `Setup`
+        run: |
+          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
+          echo $CI_BRANCH_PUSH
+          echo $CI_BRANCH_WORKFLOW_RUN
+          echo $CI_SHA_PUSH
+          echo $CI_SHA_WORKFLOW_RUN
+          [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
+          [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
+
+      - name: print environment variables
+        run: |
+          echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
+          echo "env.CI_SHA = ${{ env.CI_SHA }}"
+
+      - name: Update clone using environment variables
+        working-directory: /transformers
+        run: |
+          echo "original branch = $(git branch --show-current)"
+          git fetch && git checkout ${{ env.CI_BRANCH }}
+          echo "updated branch = $(git branch --show-current)"
+          git checkout ${{ env.CI_SHA }}
+          echo "log = $(git log -n 1)"
+
+      - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
+        working-directory: /transformers
+        run: python3 -m pip uninstall -y transformers && python3 -m pip install -e .
+
+      - name: Echo folder ${{ matrix.folders }}
+        shell: bash
+        # For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to
+        # set the artifact folder names (because the character `/` is not allowed).
+        run: |
+          echo "${{ matrix.folders }}"
+          echo "${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }}"
+          matrix_folders=${{ matrix.folders }}
+          matrix_folders=${matrix_folders/'models/'/'models_'}
+          echo "$matrix_folders"
+          echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV
+
+      - name: ROCM-SMI
+        run: |
+          rocm-smi
+      - name: ROCM-INFO
+        run: |
+          rocminfo  | grep "Agent" -A 14
+      - name: Show ROCR environment
+        run: |
+          echo "ROCR: $ROCR_VISIBLE_DEVICES"
+
+      - name: Environment
+        working-directory: /transformers
+        run: |
+          python3 utils/print_env.py
+
+      - name: Show installed libraries and their versions
+        working-directory: /transformers
+        run: pip freeze
+
+      - name: Run all non-slow selected tests on GPU
+        working-directory: /transformers
+        run: |
+          python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports ${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }} -m "not not_device_test"
+
+      - name: Failure short reports
+        if: ${{ failure() }}
+        continue-on-error: true
+        run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt
+
+      - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports"
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports
+          path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports
+
+  send_results:
+    name: Send results to webhook
+    runs-on: ubuntu-22.04
+    if: always()
+    needs: [
+        check_runner_status,
+        check_runners,
+        setup_gpu,
+        run_models_gpu,
+#        run_tests_torch_cuda_extensions_single_gpu,
+#        run_tests_torch_cuda_extensions_multi_gpu
+    ]
+    env:
+      # For the meaning of these environment variables, see the job `Setup`
+      CI_BRANCH_PUSH: ${{ github.event.ref }}
+      CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }}
+      CI_SHA_PUSH: ${{ github.event.head_commit.id }}
+      CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }}
+    steps:
+      - name: Preliminary job status
+        shell: bash
+        # For the meaning of these environment variables, see the job `Setup`
+        run: |
+          echo "Runner availability: ${{ needs.check_runner_status.result }}"
+          echo "Setup status: ${{ needs.setup_gpu.result }}"
+          echo "Runner status: ${{ needs.check_runners.result }}"
+
+      # Necessary to get the correct branch name and commit SHA for `workflow_run` event
+      # We also take into account the `push` event (we might want to test some changes in a branch)
+      - name: Prepare custom environment variables
+        shell: bash
+        # For the meaning of these environment variables, see the job `Setup`
+        run: |
+          CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''}
+          echo $CI_BRANCH_PUSH
+          echo $CI_BRANCH_WORKFLOW_RUN
+          echo $CI_SHA_PUSH
+          echo $CI_SHA_WORKFLOW_RUN
+          [[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV
+          [[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV
+
+      - name: print environment variables
+        run: |
+          echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}"
+          echo "env.CI_SHA = ${{ env.CI_SHA }}"
+
+      - uses: actions/checkout@v4
+        # To avoid failure when multiple commits are merged into `main` in a short period of time.
+        # Checking out to an old commit beyond the fetch depth will get an error `fatal: reference is not a tree: ...
+        # (Only required for `workflow_run` event, where we get the latest HEAD on `main` instead of the event commit)
+        with:
+          fetch-depth: 20
+
+      - name: Update clone using environment variables
+        run: |
+          echo "original branch = $(git branch --show-current)"
+          git fetch && git checkout ${{ env.CI_BRANCH }}
+          echo "updated branch = $(git branch --show-current)"
+          git checkout ${{ env.CI_SHA }}
+          echo "log = $(git log -n 1)"
+
+      - uses: actions/download-artifact@v4
+      - name: Send message to Slack
+        env:
+          CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }}
+          CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }}
+          CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
+          CI_SLACK_CHANNEL_ID_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }}
+          CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
+          CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }}
+          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
+          CI_EVENT: Push CI (AMD) - ${{ inputs.gpu_flavor }}
+          CI_TITLE_PUSH: ${{ github.event.head_commit.message }}
+          CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }}
+          CI_SHA: ${{ env.CI_SHA }}
+          RUNNER_STATUS: ${{ needs.check_runner_status.result }}
+          RUNNER_ENV_STATUS: ${{ needs.check_runners.result }}
+          SETUP_STATUS: ${{ needs.setup_gpu.result }}
+
+        # We pass `needs.setup_gpu.outputs.matrix` as the argument. A processing in `notification_service.py` to change
+        # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
+        run: |
+          pip install huggingface_hub
+          pip install slack_sdk
+          pip show slack_sdk
+          python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}"

From 76a9fdcbc57781637433c0e5f22abc8f5c25af68 Mon Sep 17 00:00:00 2001
From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com>
Date: Mon, 16 Dec 2024 13:54:53 +0100
Subject: [PATCH 04/10] Revert amd push ci changes

---
 .../workflows/self-push-amd-mi210-caller.yml  | 50 +++++++++----------
 .../workflows/self-push-amd-mi250-caller.yml  | 50 +++++++++----------
 2 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/.github/workflows/self-push-amd-mi210-caller.yml b/.github/workflows/self-push-amd-mi210-caller.yml
index 2fb075eb212190..a401e40ee7f164 100644
--- a/.github/workflows/self-push-amd-mi210-caller.yml
+++ b/.github/workflows/self-push-amd-mi210-caller.yml
@@ -1,25 +1,25 @@
-name: Self-hosted runner (AMD mi210 CI caller)
-
-on:
-  workflow_run:
-    workflows: ["Self-hosted runner (push-caller)"]
-    branches: ["main"]
-    types: [completed]
-  push:
-    branches:
-      - run_amd_push_ci_caller*
-    paths:
-      - "src/**"
-      - "tests/**"
-      - ".github/**"
-      - "templates/**"
-      - "utils/**"
-
-jobs:
-  run_amd_ci:
-    name: AMD mi210
-    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
-    uses: ./.github/workflows/self-push-amd.yml
-    with:
-      gpu_flavor: mi210
-    secrets: inherit
+name: Self-hosted runner (AMD mi210 CI caller)
+
+on:
+  workflow_run:
+    workflows: ["Self-hosted runner (push-caller)"]
+    branches: ["main"]
+    types: [completed]
+  push:
+    branches:
+      - run_amd_push_ci_caller*
+    paths:
+      - "src/**"
+      - "tests/**"
+      - ".github/**"
+      - "templates/**"
+      - "utils/**"
+
+jobs:
+  run_amd_ci:
+    name: AMD mi210
+    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
+    uses: ./.github/workflows/self-push-amd.yml
+    with:
+      gpu_flavor: mi210
+    secrets: inherit
diff --git a/.github/workflows/self-push-amd-mi250-caller.yml b/.github/workflows/self-push-amd-mi250-caller.yml
index 2485a78c33df3c..fef532703170cb 100644
--- a/.github/workflows/self-push-amd-mi250-caller.yml
+++ b/.github/workflows/self-push-amd-mi250-caller.yml
@@ -1,25 +1,25 @@
-name: Self-hosted runner (AMD mi250 CI caller)
-
-on:
-  workflow_run:
-    workflows: ["Self-hosted runner (push-caller)"]
-    branches: ["main"]
-    types: [completed]
-  push:
-    branches:
-      - run_amd_push_ci_caller*
-    paths:
-      - "src/**"
-      - "tests/**"
-      - ".github/**"
-      - "templates/**"
-      - "utils/**"
-
-jobs:
-  run_amd_ci:
-    name: AMD mi250
-    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
-    uses: ./.github/workflows/self-push-amd.yml
-    with:
-      gpu_flavor: mi250
-    secrets: inherit
+name: Self-hosted runner (AMD mi250 CI caller)
+
+on:
+  workflow_run:
+    workflows: ["Self-hosted runner (push-caller)"]
+    branches: ["main"]
+    types: [completed]
+  push:
+    branches:
+      - run_amd_push_ci_caller*
+    paths:
+      - "src/**"
+      - "tests/**"
+      - ".github/**"
+      - "templates/**"
+      - "utils/**"
+
+jobs:
+  run_amd_ci:
+    name: AMD mi250
+    if: (cancelled() != true) && ((github.event_name == 'workflow_run') || ((github.event_name == 'push') && startsWith(github.ref_name, 'run_amd_push_ci_caller')))
+    uses: ./.github/workflows/self-push-amd.yml
+    with:
+      gpu_flavor: mi250
+    secrets: inherit

From b2d0e283e94690b442bd83642479ac1bb8c33cfc Mon Sep 17 00:00:00 2001
From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com>
Date: Fri, 20 Dec 2024 18:24:33 +0100
Subject: [PATCH 05/10] Add option of specifying result upload repo

---
 .github/workflows/slack-report.yml | 15 ++++++++++++---
 utils/notification_service.py      | 23 ++++++++++++++++-------
 2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/slack-report.yml b/.github/workflows/slack-report.yml
index ee2962ba89c37f..eb3294f392d94f 100644
--- a/.github/workflows/slack-report.yml
+++ b/.github/workflows/slack-report.yml
@@ -21,6 +21,13 @@ on:
       ci_event:
         required: true
         type: string
+      report_repo_id:
+        required: false
+        type: string
+      upload_report_summary:
+        required: false
+        type: boolean
+        default: false
 
 env:
   TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
@@ -47,6 +54,8 @@ jobs:
           CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
           CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
           SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
+          REPORT_REPO_ID: ${{ inputs.report_repo_id }}
+          UPLOAD_REPORT_SUMMARY: ${{ inputs.upload_report_summary }}
           ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
           CI_EVENT: ${{ inputs.ci_event }}
           CI_SHA: ${{ github.sha }}
@@ -70,7 +79,7 @@ jobs:
         with:
           name: ci_results_${{ inputs.job }}
           path: ci_results_${{ inputs.job }}
-      
+
       - uses: actions/checkout@v4
       - uses: actions/download-artifact@v4
       - name: Send message to Slack for quantization workflow
@@ -90,7 +99,7 @@ jobs:
           pip install huggingface_hub
           pip install slack_sdk
           pip show slack_sdk
-          python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}" 
+          python utils/notification_service_quantization.py "${{ inputs.quantization_matrix }}"
 
       # Upload complete failure tables, as they might be big and only truncated versions could be sent to Slack.
       - name: Failure table artifacts
@@ -98,4 +107,4 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: ci_results_${{ inputs.job }}
-          path: ci_results_${{ inputs.job }}
\ No newline at end of file
+          path: ci_results_${{ inputs.job }}
diff --git a/utils/notification_service.py b/utils/notification_service.py
index 6c9eab3a85387b..3581fd4fac3f7f 100644
--- a/utils/notification_service.py
+++ b/utils/notification_service.py
@@ -108,11 +108,13 @@ def __init__(
         ci_title: str,
         model_results: Dict,
         additional_results: Dict,
-        selected_warnings: List = None,
+        repo_id: str = "hf-internal-testing/transformers_daily_ci",
+        selected_warnings: Union[List, None] = None,
         prev_ci_artifacts=None,
     ):
         self.title = title
         self.ci_title = ci_title
+        self.repo_id = repo_id
 
         # Failures and success of the modeling tests
         self.n_model_success = sum(r["success"] for r in model_results.values())
@@ -533,11 +535,11 @@ def payload(self) -> str:
             commit_info = api.upload_file(
                 path_or_fileobj=file_path,
                 path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt",
-                repo_id="hf-internal-testing/transformers_daily_ci",
+                repo_id=self.repo_id,
                 repo_type="dataset",
                 token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
             )
-            url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt"
+            url = f"https://huggingface.co/datasets/{self.repo_id}/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt"
 
             # extra processing to save to json format
             new_failed_tests = {}
@@ -560,7 +562,7 @@ def payload(self) -> str:
             _ = api.upload_file(
                 path_or_fileobj=file_path,
                 path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.json",
-                repo_id="hf-internal-testing/transformers_daily_ci",
+                repo_id=self.repo_id,
                 repo_type="dataset",
                 token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
             )
@@ -920,6 +922,8 @@ def prepare_reports(title, header, reports, to_truncate=True):
 
 if __name__ == "__main__":
     SLACK_REPORT_CHANNEL_ID = os.environ["SLACK_REPORT_CHANNEL"]
+    REPORT_REPO_ID = os.environ.get("REPORT_REPO_ID", "hf-internal-testing/transformers_daily_ci")
+    UPLOAD_REPORT_SUMMARY = os.environ.get("UPLOAD_REPORT_SUMMARY") == "true"
 
     # runner_status = os.environ.get("RUNNER_STATUS")
     # runner_env_status = os.environ.get("RUNNER_ENV_STATUS")
@@ -1220,7 +1224,8 @@ def prepare_reports(title, header, reports, to_truncate=True):
         os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}"))
 
     target_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml@refs/heads/main"
-    is_scheduled_ci_run = os.environ.get("CI_WORKFLOW_REF") == target_workflow
+    amd_target_workflow = "huggingface/transformers/.github/workflows/self-scheduled-amd-caller.yml@refs/heads/main"
+    is_scheduled_ci_run = os.environ.get("CI_WORKFLOW_REF") in [target_workflow, amd_target_workflow]
 
     # Only the model testing job is concerned: this condition is to avoid other jobs to upload the empty list as
     # results.
@@ -1233,7 +1238,7 @@ def prepare_reports(title, header, reports, to_truncate=True):
             api.upload_file(
                 path_or_fileobj=f"ci_results_{job_name}/model_results.json",
                 path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/model_results.json",
-                repo_id="hf-internal-testing/transformers_daily_ci",
+                repo_id=REPORT_REPO_ID,
                 repo_type="dataset",
                 token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
             )
@@ -1255,7 +1260,7 @@ def prepare_reports(title, header, reports, to_truncate=True):
             api.upload_file(
                 path_or_fileobj=f"ci_results_{job_name}/{test_to_result_name[job]}_results.json",
                 path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/{test_to_result_name[job]}_results.json",
-                repo_id="hf-internal-testing/transformers_daily_ci",
+                repo_id=REPORT_REPO_ID,
                 repo_type="dataset",
                 token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
             )
@@ -1276,6 +1281,7 @@ def prepare_reports(title, header, reports, to_truncate=True):
         ci_title,
         model_results,
         additional_results,
+        repo_id=REPORT_REPO_ID,
         selected_warnings=selected_warnings,
         prev_ci_artifacts=prev_ci_artifacts,
     )
@@ -1284,3 +1290,6 @@ def prepare_reports(title, header, reports, to_truncate=True):
     if message.n_failures or (ci_event != "push" and not ci_event.startswith("Push CI (AMD)")):
         message.post()
         message.post_reply()
+
+        # if UPLOAD_REPORT_SUMMARY
+        # message.upload_to_repo()

From da3448dacf8ae5265d4b005a4e6902dcddb804ad Mon Sep 17 00:00:00 2001
From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com>
Date: Tue, 14 Jan 2025 14:35:47 +0100
Subject: [PATCH 06/10] handle empty string REPORT_REPO_ID correctly

---
 utils/notification_service.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/utils/notification_service.py b/utils/notification_service.py
index 3581fd4fac3f7f..02cc1194beab8e 100644
--- a/utils/notification_service.py
+++ b/utils/notification_service.py
@@ -922,7 +922,9 @@ def prepare_reports(title, header, reports, to_truncate=True):
 
 if __name__ == "__main__":
     SLACK_REPORT_CHANNEL_ID = os.environ["SLACK_REPORT_CHANNEL"]
-    REPORT_REPO_ID = os.environ.get("REPORT_REPO_ID", "hf-internal-testing/transformers_daily_ci")
+    REPORT_REPO_ID = os.environ.get("REPORT_REPO_ID")
+    if not REPORT_REPO_ID:
+        REPORT_REPO_ID = "hf-internal-testing/transformers_daily_ci"
     UPLOAD_REPORT_SUMMARY = os.environ.get("UPLOAD_REPORT_SUMMARY") == "true"
 
     # runner_status = os.environ.get("RUNNER_STATUS")
@@ -1290,6 +1292,3 @@ def prepare_reports(title, header, reports, to_truncate=True):
     if message.n_failures or (ci_event != "push" and not ci_event.startswith("Push CI (AMD)")):
         message.post()
         message.post_reply()
-
-        # if UPLOAD_REPORT_SUMMARY
-        # message.upload_to_repo()

From 0d90a51f726145d40da46b2b9d546085bfb4b6da Mon Sep 17 00:00:00 2001
From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com>
Date: Tue, 14 Jan 2025 15:00:18 +0100
Subject: [PATCH 07/10] Add workflow_id (defaults to Self-hosted runner
 (scheduled))

---
 utils/get_previous_daily_ci.py | 26 ++++++++++++++------------
 utils/notification_service.py  |  3 ++-
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/utils/get_previous_daily_ci.py b/utils/get_previous_daily_ci.py
index efd7d24a752991..e75b4896d482d3 100644
--- a/utils/get_previous_daily_ci.py
+++ b/utils/get_previous_daily_ci.py
@@ -5,7 +5,14 @@
 from get_ci_error_statistics import download_artifact, get_artifacts_links
 
 
-def get_daily_ci_runs(token, num_runs=7):
+
+# This is the id of a workflow (not of a workflow run).
+# From a given workflow run (where we have workflow run id), we can get the workflow id by going to
+# https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}
+# and check the `workflow_id` key.
+DEFAULT_WORKFLOW_ID = "90575235"
+
+def get_daily_ci_runs(token, workflow_id = DEFAULT_WORKFLOW_ID, num_runs=7):
     """Get the workflow runs of the scheduled (daily) CI.
 
     This only selects the runs triggered by the `schedule` event on the `main` branch.
@@ -14,11 +21,6 @@ def get_daily_ci_runs(token, num_runs=7):
     if token is not None:
         headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}"}
 
-    # The id of a workflow (not of a workflow run).
-    # From a given workflow run (where we have workflow run id), we can get the workflow id by going to
-    # https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}
-    # and check the `workflow_id` key.
-    workflow_id = "90575235"
 
     url = f"https://api.github.com/repos/huggingface/transformers/actions/workflows/{workflow_id}/runs"
     # On `main` branch + event being `schedule` + not returning PRs + only `num_runs` results
@@ -29,9 +31,9 @@ def get_daily_ci_runs(token, num_runs=7):
     return result["workflow_runs"]
 
 
-def get_last_daily_ci_runs(token):
+def get_last_daily_ci_runs(token, workflow_id = DEFAULT_WORKFLOW_ID):
     """Get the last completed workflow run id of the scheduled (daily) CI."""
-    workflow_runs = get_daily_ci_runs(token)
+    workflow_runs = get_daily_ci_runs(token, workflow_id)
     workflow_run_id = None
     for workflow_run in workflow_runs:
         if workflow_run["status"] == "completed":
@@ -53,9 +55,9 @@ def get_last_daily_ci_run_commit(token):
     return head_sha
 
 
-def get_last_daily_ci_artifacts(artifact_names, output_dir, token):
+def get_last_daily_ci_artifacts(artifact_names, output_dir, token, workflow_id = DEFAULT_WORKFLOW_ID):
     """Get the artifacts of last completed workflow run id of the scheduled (daily) CI."""
-    workflow_run_id = get_last_daily_ci_runs(token)
+    workflow_run_id = get_last_daily_ci_runs(token, workflow_id)
     if workflow_run_id is not None:
         artifacts_links = get_artifacts_links(worflow_run_id=workflow_run_id, token=token)
         for artifact_name in artifact_names:
@@ -66,9 +68,9 @@ def get_last_daily_ci_artifacts(artifact_names, output_dir, token):
                 )
 
 
-def get_last_daily_ci_reports(artifact_names, output_dir, token):
+def get_last_daily_ci_reports(artifact_names, output_dir, token, workflow_id = DEFAULT_WORKFLOW_ID):
     """Get the artifacts' content of the last completed workflow run id of the scheduled (daily) CI."""
-    get_last_daily_ci_artifacts(artifact_names, output_dir, token)
+    get_last_daily_ci_artifacts(artifact_names, output_dir, token, workflow_id)
 
     results = {}
     for artifact_name in artifact_names:
diff --git a/utils/notification_service.py b/utils/notification_service.py
index 02cc1194beab8e..d361ffa725ee55 100644
--- a/utils/notification_service.py
+++ b/utils/notification_service.py
@@ -926,6 +926,7 @@ def prepare_reports(title, header, reports, to_truncate=True):
     if not REPORT_REPO_ID:
         REPORT_REPO_ID = "hf-internal-testing/transformers_daily_ci"
     UPLOAD_REPORT_SUMMARY = os.environ.get("UPLOAD_REPORT_SUMMARY") == "true"
+    WORKFLOW_ID = "90575235"
 
     # runner_status = os.environ.get("RUNNER_STATUS")
     # runner_env_status = os.environ.get("RUNNER_ENV_STATUS")
@@ -1275,7 +1276,7 @@ def prepare_reports(title, header, reports, to_truncate=True):
             output_dir = os.path.join(os.getcwd(), "previous_reports")
             os.makedirs(output_dir, exist_ok=True)
             prev_ci_artifacts = get_last_daily_ci_reports(
-                artifact_names=artifact_names, output_dir=output_dir, token=os.environ["ACCESS_REPO_INFO_TOKEN"]
+                artifact_names=artifact_names, output_dir=output_dir, token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_id=WORKFLOW_ID
             )
 
     message = Message(

From 526bb303d245cc053a9da8c8d2252172568aa9b1 Mon Sep 17 00:00:00 2001
From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com>
Date: Wed, 15 Jan 2025 12:07:47 +0100
Subject: [PATCH 08/10] Fix call to get_workflow_id. ruff format

---
 utils/get_previous_daily_ci.py | 27 +++++++++++++++++++++------
 utils/notification_service.py  | 10 ++++++++--
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/utils/get_previous_daily_ci.py b/utils/get_previous_daily_ci.py
index e75b4896d482d3..c46a924ea1c138 100644
--- a/utils/get_previous_daily_ci.py
+++ b/utils/get_previous_daily_ci.py
@@ -5,14 +5,30 @@
 from get_ci_error_statistics import download_artifact, get_artifacts_links
 
 
-
 # This is the id of a workflow (not of a workflow run).
 # From a given workflow run (where we have workflow run id), we can get the workflow id by going to
 # https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}
 # and check the `workflow_id` key.
 DEFAULT_WORKFLOW_ID = "90575235"
 
-def get_daily_ci_runs(token, workflow_id = DEFAULT_WORKFLOW_ID, num_runs=7):
+
+def get_workflow_id(token, run_id):
+    """Get the workflow id of the provided run"""
+
+    if run_id is None:
+        return DEFAULT_WORKFLOW_ID
+
+    headers = None
+    if token is not None:
+        headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}"}
+
+    url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{run_id}"
+    result = requests.get(url, headers=headers).json()
+
+    return result["workflow_id"]
+
+
+def get_daily_ci_runs(token, workflow_id=DEFAULT_WORKFLOW_ID, num_runs=7):
     """Get the workflow runs of the scheduled (daily) CI.
 
     This only selects the runs triggered by the `schedule` event on the `main` branch.
@@ -21,7 +37,6 @@ def get_daily_ci_runs(token, workflow_id = DEFAULT_WORKFLOW_ID, num_runs=7):
     if token is not None:
         headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}"}
 
-
     url = f"https://api.github.com/repos/huggingface/transformers/actions/workflows/{workflow_id}/runs"
     # On `main` branch + event being `schedule` + not returning PRs + only `num_runs` results
     url += f"?branch=main&event=schedule&exclude_pull_requests=true&per_page={num_runs}"
@@ -31,7 +46,7 @@ def get_daily_ci_runs(token, workflow_id = DEFAULT_WORKFLOW_ID, num_runs=7):
     return result["workflow_runs"]
 
 
-def get_last_daily_ci_runs(token, workflow_id = DEFAULT_WORKFLOW_ID):
+def get_last_daily_ci_runs(token, workflow_id=DEFAULT_WORKFLOW_ID):
     """Get the last completed workflow run id of the scheduled (daily) CI."""
     workflow_runs = get_daily_ci_runs(token, workflow_id)
     workflow_run_id = None
@@ -55,7 +70,7 @@ def get_last_daily_ci_run_commit(token):
     return head_sha
 
 
-def get_last_daily_ci_artifacts(artifact_names, output_dir, token, workflow_id = DEFAULT_WORKFLOW_ID):
+def get_last_daily_ci_artifacts(artifact_names, output_dir, token, workflow_id=DEFAULT_WORKFLOW_ID):
     """Get the artifacts of last completed workflow run id of the scheduled (daily) CI."""
     workflow_run_id = get_last_daily_ci_runs(token, workflow_id)
     if workflow_run_id is not None:
@@ -68,7 +83,7 @@ def get_last_daily_ci_artifacts(artifact_names, output_dir, token, workflow_id =
                 )
 
 
-def get_last_daily_ci_reports(artifact_names, output_dir, token, workflow_id = DEFAULT_WORKFLOW_ID):
+def get_last_daily_ci_reports(artifact_names, output_dir, token, workflow_id=DEFAULT_WORKFLOW_ID):
     """Get the artifacts' content of the last completed workflow run id of the scheduled (daily) CI."""
     get_last_daily_ci_artifacts(artifact_names, output_dir, token, workflow_id)
 
diff --git a/utils/notification_service.py b/utils/notification_service.py
index d361ffa725ee55..d501a24eb02899 100644
--- a/utils/notification_service.py
+++ b/utils/notification_service.py
@@ -26,7 +26,7 @@
 
 import requests
 from get_ci_error_statistics import get_jobs
-from get_previous_daily_ci import get_last_daily_ci_reports
+from get_previous_daily_ci import get_last_daily_ci_reports, get_workflow_id
 from huggingface_hub import HfApi
 from slack_sdk import WebClient
 
@@ -1275,8 +1275,14 @@ def prepare_reports(title, header, reports, to_truncate=True):
             artifact_names = [f"ci_results_{job_name}"]
             output_dir = os.path.join(os.getcwd(), "previous_reports")
             os.makedirs(output_dir, exist_ok=True)
+            workflow_id = None
+            token = os.environ["ACCESS_REPO_INFO_TOKEN"]
+            workflow_id = get_workflow_id(token, os.environ["GITHUB_RUN_ID"])
             prev_ci_artifacts = get_last_daily_ci_reports(
-                artifact_names=artifact_names, output_dir=output_dir, token=os.environ["ACCESS_REPO_INFO_TOKEN"], workflow_id=WORKFLOW_ID
+                artifact_names=artifact_names,
+                output_dir=output_dir,
+                token=token,
+                workflow_id=workflow_id,
             )
 
     message = Message(

From 19c73cb0b1a7078c3d0d1b77bed405a1c5b55997 Mon Sep 17 00:00:00 2001
From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com>
Date: Wed, 15 Jan 2025 13:02:21 +0100
Subject: [PATCH 09/10] Remove redundant variable

---
 utils/notification_service.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/utils/notification_service.py b/utils/notification_service.py
index d501a24eb02899..301c77d7b201fb 100644
--- a/utils/notification_service.py
+++ b/utils/notification_service.py
@@ -1275,7 +1275,6 @@ def prepare_reports(title, header, reports, to_truncate=True):
             artifact_names = [f"ci_results_{job_name}"]
             output_dir = os.path.join(os.getcwd(), "previous_reports")
             os.makedirs(output_dir, exist_ok=True)
-            workflow_id = None
             token = os.environ["ACCESS_REPO_INFO_TOKEN"]
             workflow_id = get_workflow_id(token, os.environ["GITHUB_RUN_ID"])
             prev_ci_artifacts = get_last_daily_ci_reports(

From 4afffcf9a6196430b8a129d6be8987d7f7f71a6d Mon Sep 17 00:00:00 2001
From: Ivar Flakstad <69173633+ivarflakstad@users.noreply.github.com>
Date: Fri, 17 Jan 2025 20:46:17 +0100
Subject: [PATCH 10/10] Revert some changes that were deemed no longer required

---
 .github/workflows/slack-report.yml |  9 -------
 utils/get_previous_daily_ci.py     | 43 +++++++++---------------------
 utils/notification_service.py      | 32 +++++++---------------
 3 files changed, 22 insertions(+), 62 deletions(-)

diff --git a/.github/workflows/slack-report.yml b/.github/workflows/slack-report.yml
index eb3294f392d94f..cbea37ff567a96 100644
--- a/.github/workflows/slack-report.yml
+++ b/.github/workflows/slack-report.yml
@@ -21,13 +21,6 @@ on:
       ci_event:
         required: true
         type: string
-      report_repo_id:
-        required: false
-        type: string
-      upload_report_summary:
-        required: false
-        type: boolean
-        default: false
 
 env:
   TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
@@ -54,8 +47,6 @@ jobs:
           CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }}
           CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }}
           SLACK_REPORT_CHANNEL: ${{ inputs.slack_report_channel }}
-          REPORT_REPO_ID: ${{ inputs.report_repo_id }}
-          UPLOAD_REPORT_SUMMARY: ${{ inputs.upload_report_summary }}
           ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
           CI_EVENT: ${{ inputs.ci_event }}
           CI_SHA: ${{ github.sha }}
diff --git a/utils/get_previous_daily_ci.py b/utils/get_previous_daily_ci.py
index c46a924ea1c138..efd7d24a752991 100644
--- a/utils/get_previous_daily_ci.py
+++ b/utils/get_previous_daily_ci.py
@@ -5,30 +5,7 @@
 from get_ci_error_statistics import download_artifact, get_artifacts_links
 
 
-# This is the id of a workflow (not of a workflow run).
-# From a given workflow run (where we have workflow run id), we can get the workflow id by going to
-# https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}
-# and check the `workflow_id` key.
-DEFAULT_WORKFLOW_ID = "90575235"
-
-
-def get_workflow_id(token, run_id):
-    """Get the workflow id of the provided run"""
-
-    if run_id is None:
-        return DEFAULT_WORKFLOW_ID
-
-    headers = None
-    if token is not None:
-        headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}"}
-
-    url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{run_id}"
-    result = requests.get(url, headers=headers).json()
-
-    return result["workflow_id"]
-
-
-def get_daily_ci_runs(token, workflow_id=DEFAULT_WORKFLOW_ID, num_runs=7):
+def get_daily_ci_runs(token, num_runs=7):
     """Get the workflow runs of the scheduled (daily) CI.
 
     This only selects the runs triggered by the `schedule` event on the `main` branch.
@@ -37,6 +14,12 @@ def get_daily_ci_runs(token, workflow_id=DEFAULT_WORKFLOW_ID, num_runs=7):
     if token is not None:
         headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}"}
 
+    # The id of a workflow (not of a workflow run).
+    # From a given workflow run (where we have workflow run id), we can get the workflow id by going to
+    # https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}
+    # and check the `workflow_id` key.
+    workflow_id = "90575235"
+
     url = f"https://api.github.com/repos/huggingface/transformers/actions/workflows/{workflow_id}/runs"
     # On `main` branch + event being `schedule` + not returning PRs + only `num_runs` results
     url += f"?branch=main&event=schedule&exclude_pull_requests=true&per_page={num_runs}"
@@ -46,9 +29,9 @@ def get_daily_ci_runs(token, workflow_id=DEFAULT_WORKFLOW_ID, num_runs=7):
     return result["workflow_runs"]
 
 
-def get_last_daily_ci_runs(token, workflow_id=DEFAULT_WORKFLOW_ID):
+def get_last_daily_ci_runs(token):
     """Get the last completed workflow run id of the scheduled (daily) CI."""
-    workflow_runs = get_daily_ci_runs(token, workflow_id)
+    workflow_runs = get_daily_ci_runs(token)
     workflow_run_id = None
     for workflow_run in workflow_runs:
         if workflow_run["status"] == "completed":
@@ -70,9 +53,9 @@ def get_last_daily_ci_run_commit(token):
     return head_sha
 
 
-def get_last_daily_ci_artifacts(artifact_names, output_dir, token, workflow_id=DEFAULT_WORKFLOW_ID):
+def get_last_daily_ci_artifacts(artifact_names, output_dir, token):
     """Get the artifacts of last completed workflow run id of the scheduled (daily) CI."""
-    workflow_run_id = get_last_daily_ci_runs(token, workflow_id)
+    workflow_run_id = get_last_daily_ci_runs(token)
     if workflow_run_id is not None:
         artifacts_links = get_artifacts_links(worflow_run_id=workflow_run_id, token=token)
         for artifact_name in artifact_names:
@@ -83,9 +66,9 @@ def get_last_daily_ci_artifacts(artifact_names, output_dir, token, workflow_id=D
                 )
 
 
-def get_last_daily_ci_reports(artifact_names, output_dir, token, workflow_id=DEFAULT_WORKFLOW_ID):
+def get_last_daily_ci_reports(artifact_names, output_dir, token):
     """Get the artifacts' content of the last completed workflow run id of the scheduled (daily) CI."""
-    get_last_daily_ci_artifacts(artifact_names, output_dir, token, workflow_id)
+    get_last_daily_ci_artifacts(artifact_names, output_dir, token)
 
     results = {}
     for artifact_name in artifact_names:
diff --git a/utils/notification_service.py b/utils/notification_service.py
index 301c77d7b201fb..6c9eab3a85387b 100644
--- a/utils/notification_service.py
+++ b/utils/notification_service.py
@@ -26,7 +26,7 @@
 
 import requests
 from get_ci_error_statistics import get_jobs
-from get_previous_daily_ci import get_last_daily_ci_reports, get_workflow_id
+from get_previous_daily_ci import get_last_daily_ci_reports
 from huggingface_hub import HfApi
 from slack_sdk import WebClient
 
@@ -108,13 +108,11 @@ def __init__(
         ci_title: str,
         model_results: Dict,
         additional_results: Dict,
-        repo_id: str = "hf-internal-testing/transformers_daily_ci",
-        selected_warnings: Union[List, None] = None,
+        selected_warnings: List = None,
         prev_ci_artifacts=None,
     ):
         self.title = title
         self.ci_title = ci_title
-        self.repo_id = repo_id
 
         # Failures and success of the modeling tests
         self.n_model_success = sum(r["success"] for r in model_results.values())
@@ -535,11 +533,11 @@ def payload(self) -> str:
             commit_info = api.upload_file(
                 path_or_fileobj=file_path,
                 path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt",
-                repo_id=self.repo_id,
+                repo_id="hf-internal-testing/transformers_daily_ci",
                 repo_type="dataset",
                 token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
             )
-            url = f"https://huggingface.co/datasets/{self.repo_id}/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt"
+            url = f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/{commit_info.oid}/{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.txt"
 
             # extra processing to save to json format
             new_failed_tests = {}
@@ -562,7 +560,7 @@ def payload(self) -> str:
             _ = api.upload_file(
                 path_or_fileobj=file_path,
                 path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/new_model_failures.json",
-                repo_id=self.repo_id,
+                repo_id="hf-internal-testing/transformers_daily_ci",
                 repo_type="dataset",
                 token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
             )
@@ -922,11 +920,6 @@ def prepare_reports(title, header, reports, to_truncate=True):
 
 if __name__ == "__main__":
     SLACK_REPORT_CHANNEL_ID = os.environ["SLACK_REPORT_CHANNEL"]
-    REPORT_REPO_ID = os.environ.get("REPORT_REPO_ID")
-    if not REPORT_REPO_ID:
-        REPORT_REPO_ID = "hf-internal-testing/transformers_daily_ci"
-    UPLOAD_REPORT_SUMMARY = os.environ.get("UPLOAD_REPORT_SUMMARY") == "true"
-    WORKFLOW_ID = "90575235"
 
     # runner_status = os.environ.get("RUNNER_STATUS")
     # runner_env_status = os.environ.get("RUNNER_ENV_STATUS")
@@ -1227,8 +1220,7 @@ def prepare_reports(title, header, reports, to_truncate=True):
         os.makedirs(os.path.join(os.getcwd(), f"ci_results_{job_name}"))
 
     target_workflow = "huggingface/transformers/.github/workflows/self-scheduled-caller.yml@refs/heads/main"
-    amd_target_workflow = "huggingface/transformers/.github/workflows/self-scheduled-amd-caller.yml@refs/heads/main"
-    is_scheduled_ci_run = os.environ.get("CI_WORKFLOW_REF") in [target_workflow, amd_target_workflow]
+    is_scheduled_ci_run = os.environ.get("CI_WORKFLOW_REF") == target_workflow
 
     # Only the model testing job is concerned: this condition is to avoid other jobs to upload the empty list as
     # results.
@@ -1241,7 +1233,7 @@ def prepare_reports(title, header, reports, to_truncate=True):
             api.upload_file(
                 path_or_fileobj=f"ci_results_{job_name}/model_results.json",
                 path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/model_results.json",
-                repo_id=REPORT_REPO_ID,
+                repo_id="hf-internal-testing/transformers_daily_ci",
                 repo_type="dataset",
                 token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
             )
@@ -1263,7 +1255,7 @@ def prepare_reports(title, header, reports, to_truncate=True):
             api.upload_file(
                 path_or_fileobj=f"ci_results_{job_name}/{test_to_result_name[job]}_results.json",
                 path_in_repo=f"{datetime.datetime.today().strftime('%Y-%m-%d')}/ci_results_{job_name}/{test_to_result_name[job]}_results.json",
-                repo_id=REPORT_REPO_ID,
+                repo_id="hf-internal-testing/transformers_daily_ci",
                 repo_type="dataset",
                 token=os.environ.get("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN", None),
             )
@@ -1275,13 +1267,8 @@ def prepare_reports(title, header, reports, to_truncate=True):
             artifact_names = [f"ci_results_{job_name}"]
             output_dir = os.path.join(os.getcwd(), "previous_reports")
             os.makedirs(output_dir, exist_ok=True)
-            token = os.environ["ACCESS_REPO_INFO_TOKEN"]
-            workflow_id = get_workflow_id(token, os.environ["GITHUB_RUN_ID"])
             prev_ci_artifacts = get_last_daily_ci_reports(
-                artifact_names=artifact_names,
-                output_dir=output_dir,
-                token=token,
-                workflow_id=workflow_id,
+                artifact_names=artifact_names, output_dir=output_dir, token=os.environ["ACCESS_REPO_INFO_TOKEN"]
             )
 
     message = Message(
@@ -1289,7 +1276,6 @@ def prepare_reports(title, header, reports, to_truncate=True):
         ci_title,
         model_results,
         additional_results,
-        repo_id=REPORT_REPO_ID,
         selected_warnings=selected_warnings,
         prev_ci_artifacts=prev_ci_artifacts,
     )