Skip to content

Run Scheduled Events Docker last #133

Run Scheduled Events Docker last

Run Scheduled Events Docker last #133

name: Run Scheduled Events Docker last
permissions:
actions: write
contents: write
issues: write
pull-requests: write
on:
workflow_dispatch:
schedule:
- cron: '0 0 * * *'
jobs:
run-scheduled-events:
runs-on: self-hosted
container:
image: rocm/pytorch:latest
env:
NODE_ENV: development
ports:
- 80
volumes:
- my_docker_volume:/volume_mount
options: --device=/dev/kfd --device=/dev/dri --group-add video --ipc=host --shm-size 16G
steps:
- name: Fork Maintenance System
uses: Cemberk/Fork-Maintenance-System@docker
with:
github_token: ${{ secrets.CRED_TOKEN }}
upstream_repo: "https://github.com/huggingface/transformers"
schedule_json: |
{
"upstream_main_branch": "main",
"upstream_release_branch": "v4.43-release",
"downstream_main_branch": "upstream_sync",
"downstream_testing_branch": "rocm6.3_testing_rel4.43_testing",
"downstream_develop_branch": "develop",
"commits": [
"731f0308bb0a2796e28b68664f4ed050eab6dbfd",
"0c1b311bff616a4faf214461584a758fbab14a6f",
"5b2d4fec5abb8ffe755fe66ce0856fc066c561a6"
]
}
pr_branch_prefix: "scheduled-merge"
requirements_command: >
ls;
pwd;
sudo sed -i 's$torchaudio$$g' examples/pytorch/_tests_requirements.txt;
pip install -r examples/pytorch/_tests_requirements.txt;
pip install --no-cache-dir GPUtil azureml azureml-core tokenizers ninja cerberus sympy sacremoses sacrebleu==1.5.1 sentencepiece scipy scikit-learn "urllib3<2" ;
pip install huggingface_hub datasets ;
pip install parameterized;
unit_test_command: >
cd tests; folders=($(python3 -c 'import os; tests = os.getcwd(); model_tests = os.listdir(os.path.join(tests, "models")); d1 = sorted(list(filter(os.path.isdir, os.listdir(tests)))); d2 = sorted(list(filter(os.path.isdir, [f"models/{x}" for x in model_tests]))); d1.remove("models"); d = d2 + d1; print("\n".join(d))')); cd ..; for folder in "${folders[@]}"; do pytest tests/${folder} -v -rfEs --make-reports="huggingface_unit_tests_run_models_gpu_${folder}" -p no:faulthandler --continue-on-collection-errors -m "not not_device_test" -p no:cacheprovider || true; done; allstats=($(find reports -name stats.txt)); for stat in "${allstats[@]}"; do echo "$stat"; cat $stat; done
performance_test_command: >
echo "python examples/pytorch/language-modeling/run_mlm.py --model_name_or_path bert-base-uncased --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --do_train --do_eval --output_dir /tmp/test-mlm --per_device_train_batch_size 8 --per_device_eval_batch_size 8 --max_steps 500"