Skip to content

Commit

Permalink
Add artifact name in job step to maintain job / artifact corresponden…
Browse files Browse the repository at this point in the history
…ce (huggingface#28682)

* avoid using job name

* apply to other files

---------

Co-authored-by: ydshieh <[email protected]>
  • Loading branch information
ydshieh and ydshieh authored Jan 31, 2024
1 parent beb2a09 commit 95346e9
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 51 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/self-nightly-scheduled.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ jobs:
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt

- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -176,7 +176,7 @@ jobs:
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt

- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_nightly"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -238,7 +238,7 @@ jobs:
continue-on-error: true
run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt

- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_nightly"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/self-past.yml
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ jobs:
echo "$job_name"
echo "$job_name" > /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/job_name.txt
- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -221,7 +221,7 @@ jobs:
echo "$job_name"
echo "$job_name" > /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/job_name.txt
- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -293,7 +293,7 @@ jobs:
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt

- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports_postfix_${{ inputs.framework }}-${{ inputs.version }}"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/self-push-amd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ jobs:
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt

- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/self-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ jobs:
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt

- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -302,7 +302,7 @@ jobs:
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt

- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -392,7 +392,7 @@ jobs:
continue-on-error: true
run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt

- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -482,7 +482,7 @@ jobs:
continue-on-error: true
run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt

- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/self-scheduled-amd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ jobs:
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt

- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -237,7 +237,7 @@ jobs:
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt

- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -294,7 +294,7 @@ jobs:
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt

- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -350,7 +350,7 @@ jobs:
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt

- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -407,7 +407,7 @@ jobs:
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_deepspeed_gpu/failures_short.txt

- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_deepspeed_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/self-scheduled.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ jobs:
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt

- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -182,7 +182,7 @@ jobs:
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_gpu_${{ matrix.folders }}/failures_short.txt

- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_all_tests_gpu_${{ env.matrix_folders }}_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -233,7 +233,7 @@ jobs:
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_examples_gpu/failures_short.txt

- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_examples_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -283,7 +283,7 @@ jobs:
continue-on-error: true
run: cat /transformers/reports/${{ matrix.machine_type }}_tests_torch_pipeline_gpu/failures_short.txt

- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_pipeline_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -334,7 +334,7 @@ jobs:
run: |
cat /transformers/reports/${{ matrix.machine_type }}_tests_tf_pipeline_gpu/failures_short.txt
- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_tf_pipeline_gpu"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -394,7 +394,7 @@ jobs:
continue-on-error: true
run: cat /workspace/transformers/reports/${{ matrix.machine_type }}_tests_torch_cuda_extensions_gpu/failures_short.txt

- name: Test suite reports artifacts
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_tests_torch_cuda_extensions_gpu_test_reports"
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
Expand Down
26 changes: 26 additions & 0 deletions utils/get_ci_error_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,32 @@
import requests


def get_jobs(workflow_run_id, token=None):
"""Extract jobs in a GitHub Actions workflow run"""

headers = None
if token is not None:
headers = {"Accept": "application/vnd.github+json", "Authorization": f"Bearer {token}"}

url = f"https://api.github.com/repos/huggingface/transformers/actions/runs/{workflow_run_id}/jobs?per_page=100"
result = requests.get(url, headers=headers).json()
jobs = []

try:
jobs.extend(result["jobs"])
pages_to_iterate_over = math.ceil((result["total_count"] - 100) / 100)

for i in range(pages_to_iterate_over):
result = requests.get(url + f"&page={i + 2}", headers=headers).json()
jobs.extend(result["jobs"])

return jobs
except Exception:
print(f"Unknown error, could not fetch links:\n{traceback.format_exc()}")

return []


def get_job_links(workflow_run_id, token=None):
"""Extract job names and their job links in a GitHub Actions workflow run"""

Expand Down
45 changes: 16 additions & 29 deletions utils/notification_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from typing import Dict, List, Optional, Union

import requests
from get_ci_error_statistics import get_job_links
from get_ci_error_statistics import get_jobs
from get_previous_daily_ci import get_last_daily_ci_reports
from slack_sdk import WebClient

Expand Down Expand Up @@ -938,9 +938,19 @@ def prepare_reports(title, header, reports, to_truncate=True):
Message.error_out(title, ci_title)
raise ValueError("Errored out.")

github_actions_job_links = get_job_links(
github_actions_jobs = get_jobs(
workflow_run_id=os.environ["GITHUB_RUN_ID"], token=os.environ["ACCESS_REPO_INFO_TOKEN"]
)
github_actions_job_links = {job["name"]: job["html_url"] for job in github_actions_jobs}

artifact_name_to_job_map = {}
for job in github_actions_jobs:
for step in job["steps"]:
if step["name"].startswith("Test suite reports artifacts: "):
artifact_name = step["name"][len("Test suite reports artifacts: ") :]
artifact_name_to_job_map[artifact_name] = job
break

available_artifacts = retrieve_available_artifacts()

modeling_categories = [
Expand Down Expand Up @@ -974,32 +984,13 @@ def prepare_reports(title, header, reports, to_truncate=True):

unclassified_model_failures = []

# This prefix is used to get job links below. For past CI, we use `workflow_call`, which changes the job names from
# `Model tests (...)` to `PyTorch 1.5 / Model tests (...)` for example.
job_name_prefix = ""
if ci_event.startswith("Past CI - "):
framework, version = ci_event.replace("Past CI - ", "").split("-")
framework = "PyTorch" if framework == "pytorch" else "TensorFlow"
job_name_prefix = f"{framework} {version}"
elif ci_event.startswith("Nightly CI"):
job_name_prefix = "Nightly CI"
elif ci_event.startswith("Push CI (AMD) - "):
flavor = ci_event.replace("Push CI (AMD) - ", "")
job_name_prefix = f"AMD {flavor}"
elif ci_event.startswith("Scheduled CI (AMD) - "):
flavor = ci_event.replace("Scheduled CI (AMD) - ", "")
job_name_prefix = f"AMD {flavor}"

for model in model_results.keys():
for artifact_path in available_artifacts[f"run_all_tests_gpu_{model}_test_reports"].paths:
artifact = retrieve_artifact(artifact_path["path"], artifact_path["gpu"])
if "stats" in artifact:
# Link to the GitHub Action job
# The job names use `matrix.folder` which contain things like `models/bert` instead of `models_bert`
job_name = f"Model tests ({model.replace('models_', 'models/')}, {artifact_path['gpu']}-gpu)"
if job_name_prefix:
job_name = f"{job_name_prefix} / {job_name}"
model_results[model]["job_link"][artifact_path["gpu"]] = github_actions_job_links.get(job_name)
job = artifact_name_to_job_map[artifact_path["path"]]
model_results[model]["job_link"][artifact_path["gpu"]] = job["html_url"]
failed, success, time_spent = handle_test_results(artifact["stats"])
model_results[model]["success"] += success
model_results[model]["time_spent"] += time_spent[1:-1] + ", "
Expand Down Expand Up @@ -1084,12 +1075,8 @@ def prepare_reports(title, header, reports, to_truncate=True):

for artifact_path in available_artifacts[additional_files[key]].paths:
# Link to the GitHub Action job
job_name = key
if artifact_path["gpu"] is not None:
job_name = f"{key} ({artifact_path['gpu']}-gpu)"
if job_name_prefix:
job_name = f"{job_name_prefix} / {job_name}"
additional_results[key]["job_link"][artifact_path["gpu"]] = github_actions_job_links.get(job_name)
job = artifact_name_to_job_map[artifact_path["path"]]
additional_results[key]["job_link"][artifact_path["gpu"]] = job["html_url"]

artifact = retrieve_artifact(artifact_path["path"], artifact_path["gpu"])
stacktraces = handle_stacktraces(artifact["failures_line"])
Expand Down

0 comments on commit 95346e9

Please sign in to comment.