From 95346e9dcd2724ba8203c61759907fb3a8b737cb Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Wed, 31 Jan 2024 15:58:17 +0100 Subject: [PATCH] Add artifact name in job step to maintain job / artifact correspondence (#28682) * avoid using job name * apply to other files --------- Co-authored-by: ydshieh --- .github/workflows/self-nightly-scheduled.yml | 6 +-- .github/workflows/self-past.yml | 6 +-- .github/workflows/self-push-amd.yml | 2 +- .github/workflows/self-push.yml | 8 ++-- .github/workflows/self-scheduled-amd.yml | 10 ++--- .github/workflows/self-scheduled.yml | 12 +++--- utils/get_ci_error_statistics.py | 26 +++++++++++ utils/notification_service.py | 45 +++++++------------- 8 files changed, 64 insertions(+), 51 deletions(-) diff --git a/.github/workflows/self-nightly-scheduled.yml b/.github/workflows/self-nightly-scheduled.yml index b87951fd8d912d..5c3e30e4b424f9 100644 --- a/.github/workflows/self-nightly-scheduled.yml +++ b/.github/workflows/self-nightly-scheduled.yml @@ -115,7 +115,7 @@ jobs: continue-on-error: true run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly" if: ${{ always() }} uses: actions/upload-artifact@v3 with: @@ -176,7 +176,7 @@ jobs: continue-on-error: true run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly" if: ${{ always() }} uses: actions/upload-artifact@v3 with: @@ -238,7 +238,7 @@ jobs: continue-on-error: true run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_nightly" if: ${{ always() }} uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/self-past.yml b/.github/workflows/self-past.yml index c2a6c652e458ba..6b7587fdeb8227 100644 --- a/.github/workflows/self-past.yml +++ b/.github/workflows/self-past.yml @@ -141,7 +141,7 @@ jobs: echo "$job_name" echo "$job_name" > /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/job_name.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}" if: ${{ always() }} uses: actions/upload-artifact@v3 with: @@ -221,7 +221,7 @@ jobs: echo "$job_name" echo "$job_name" > /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/job_name.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}" if: ${{ always() }} uses: actions/upload-artifact@v3 with: @@ -293,7 +293,7 @@ jobs: continue-on-error: true run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}" if: ${{ always() }} uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml index 2630ea72ef7529..4bd7c1f4873dab 100644 --- a/.github/workflows/self-push-amd.yml +++ b/.github/workflows/self-push-amd.yml @@ -237,7 +237,7 @@ jobs: continue-on-error: true run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml index e6f1f3b3050f7a..fd823ce4f5cac8 100644 --- a/.github/workflows/self-push.yml +++ b/.github/workflows/self-push.yml @@ -207,7 +207,7 @@ jobs: continue-on-error: true run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v3 with: @@ -302,7 +302,7 @@ jobs: continue-on-error: true run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v3 with: @@ -392,7 +392,7 @@ jobs: continue-on-error: true run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v3 with: @@ -482,7 +482,7 @@ jobs: continue-on-error: true run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/self-scheduled-amd.yml b/.github/workflows/self-scheduled-amd.yml index 57d2f0339bd056..69f5f861a3ffcd 100644 --- a/.github/workflows/self-scheduled-amd.yml +++ b/.github/workflows/self-scheduled-amd.yml @@ -169,7 +169,7 @@ jobs: continue-on-error: true run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v3 with: @@ -237,7 +237,7 @@ jobs: continue-on-error: true run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v3 with: @@ -294,7 +294,7 @@ jobs: continue-on-error: true run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu" if: ${{ always() }} uses: actions/upload-artifact@v3 with: @@ -350,7 +350,7 @@ jobs: continue-on-error: true run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu" if: ${{ always() }} uses: actions/upload-artifact@v3 with: @@ -407,7 +407,7 @@ jobs: continue-on-error: true run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_deepspeed_gpu/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_deepspeed_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 80bd63357c81f8..d4b84983cdae74 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -121,7 +121,7 @@ jobs: continue-on-error: true run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v3 with: @@ -182,7 +182,7 @@ jobs: continue-on-error: true run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v3 with: @@ -233,7 +233,7 @@ jobs: continue-on-error: true run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu" if: ${{ always() }} uses: actions/upload-artifact@v3 with: @@ -283,7 +283,7 @@ jobs: continue-on-error: true run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu" if: ${{ always() }} uses: actions/upload-artifact@v3 with: @@ -334,7 +334,7 @@ jobs: run: | cat /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu" if: ${{ always() }} uses: actions/upload-artifact@v3 with: @@ -394,7 +394,7 @@ jobs: continue-on-error: true run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt - - name: Test suite reports artifacts + - name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports" if: ${{ always() }} uses: actions/upload-artifact@v3 with: diff --git a/utils/get_ci_error_statistics.py b/utils/get_ci_error_statistics.py index 93884dda1df630..eb8ffa37b80335 100644 --- a/utils/get_ci_error_statistics.py +++ b/utils/get_ci_error_statistics.py @@ -10,6 +10,32 @@ import requests +def get_jobs(workflow_run_id, token=None): + """Extract jobs in a GitHub Actions workflow run""" + + headers = None + if token is not None: + headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}"} + + url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}/jobs?per_page=100" + result = requests.get(url, headers=headers).json() + jobs = [] + + try: + jobs.extend(result["jobs"]) + pages_to_iterate_over = math.ceil((result["total_count"] - 100) / 100) + + for i in range(pages_to_iterate_over): + result = requests.get(url + f"&page={i + 2}", headers=headers).json() + jobs.extend(result["jobs"]) + + return jobs + except Exception: + print(f"Unknown error, could not fetch links:\n{traceback.format_exc()}") + + return [] + + def get_job_links(workflow_run_id, token=None): """Extract job names and their job links in a GitHub Actions workflow run""" diff --git a/utils/notification_service.py b/utils/notification_service.py index 969107b3f88481..27adf054f25f7e 100644 --- a/utils/notification_service.py +++ b/utils/notification_service.py @@ -24,7 +24,7 @@ from typing import Dict, List, Optional, Union import requests -from get_ci_error_statistics import get_job_links +from get_ci_error_statistics import get_jobs from get_previous_daily_ci import get_last_daily_ci_reports from slack_sdk import WebClient @@ -938,9 +938,19 @@ def prepare_reports(title, header, reports, to_truncate=True): Message.error_out(title, ci_title) raise ValueError("Errored out.") - github_actions_job_links = get_job_links( + github_actions_jobs = get_jobs( workflow_run_id=os.environ["GITHUB_RUN_ID"], token=os.environ["ACCESS_REPO_INFO_TOKEN"] ) + github_actions_job_links = {job["name"]: job["html_url"] for job in github_actions_jobs} + + artifact_name_to_job_map = {} + for job in github_actions_jobs: + for step in job["steps"]: + if step["name"].startswith("Test suite reports artifacts: "): + artifact_name = step["name"][len("Test suite reports artifacts: ") :] + artifact_name_to_job_map[artifact_name] = job + break + available_artifacts = retrieve_available_artifacts() modeling_categories = [ @@ -974,32 +984,13 @@ def prepare_reports(title, header, reports, to_truncate=True): unclassified_model_failures = [] - # This prefix is used to get job links below. For past CI, we use `workflow_call`, which changes the job names from - # `Model tests (...)` to `PyTorch 1.5 / Model tests (...)` for example. - job_name_prefix = "" - if ci_event.startswith("Past CI - "): - framework, version = ci_event.replace("Past CI - ", "").split("-") - framework = "PyTorch" if framework == "pytorch" else "TensorFlow" - job_name_prefix = f"{framework} {version}" - elif ci_event.startswith("Nightly CI"): - job_name_prefix = "Nightly CI" - elif ci_event.startswith("Push CI (AMD) - "): - flavor = ci_event.replace("Push CI (AMD) - ", "") - job_name_prefix = f"AMD {flavor}" - elif ci_event.startswith("Scheduled CI (AMD) - "): - flavor = ci_event.replace("Scheduled CI (AMD) - ", "") - job_name_prefix = f"AMD {flavor}" - for model in model_results.keys(): for artifact_path in available_artifacts[f"run_all_tests_gpu_{model}_test_reports"].paths: artifact = retrieve_artifact(artifact_path["path"], artifact_path["gpu"]) if "stats" in artifact: # Link to the GitHub Action job - # The job names use `matrix.folder` which contain things like `models/bert` instead of `models_bert` - job_name = f"Model tests ({model.replace('models_', 'models/')}, {artifact_path['gpu']}-gpu)" - if job_name_prefix: - job_name = f"{job_name_prefix} / {job_name}" - model_results[model]["job_link"][artifact_path["gpu"]] = github_actions_job_links.get(job_name) + job = artifact_name_to_job_map[artifact_path["path"]] + model_results[model]["job_link"][artifact_path["gpu"]] = job["html_url"] failed, success, time_spent = handle_test_results(artifact["stats"]) model_results[model]["success"] += success model_results[model]["time_spent"] += time_spent[1:-1] + ", " @@ -1084,12 +1075,8 @@ def prepare_reports(title, header, reports, to_truncate=True): for artifact_path in available_artifacts[additional_files[key]].paths: # Link to the GitHub Action job - job_name = key - if artifact_path["gpu"] is not None: - job_name = f"{key} ({artifact_path['gpu']}-gpu)" - if job_name_prefix: - job_name = f"{job_name_prefix} / {job_name}" - additional_results[key]["job_link"][artifact_path["gpu"]] = github_actions_job_links.get(job_name) + job = artifact_name_to_job_map[artifact_path["path"]] + additional_results[key]["job_link"][artifact_path["gpu"]] = job["html_url"] artifact = retrieve_artifact(artifact_path["path"], artifact_path["gpu"]) stacktraces = handle_stacktraces(artifact["failures_line"])