GH Task Runner (Error Suite) #5
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: GH Task Runner (Error Suite) | |
on: | |
workflow_dispatch: | |
inputs: | |
approval_notice: | |
description: 'WARNING: This will spin up a large number of tasks - get approval from admin before running' | |
required: false | |
default: 'NOT_APPROVED' | |
type: choice | |
options: | |
- NOT_APPROVED | |
- APPROVED | |
model_hf_repo: | |
description: 'Model Hugging Face Repository' | |
required: true | |
default: 'RWKV/rwkv-5-world-1b5' | |
model_args: | |
description: 'Model Arguments (ie: dtype="float16")' | |
required: false | |
default: 'dtype=bfloat16,trust_remote_code=True' | |
batch_size: | |
description: 'Batch Size' | |
required: true | |
default: 'auto' | |
backend: | |
description: 'Backend to use' | |
required: true | |
default: 'nvidia-gpu' | |
type: choice | |
options: | |
- nvidia-gpu | |
- intel-gpu | |
- amd-gpu | |
- any-gpu | |
gpu_vram: | |
description: 'Minimum GPU VRAM (ignored for MPS)' | |
required: true | |
default: '24' | |
type: choice | |
options: | |
- 16 | |
- 24 | |
- 40 | |
- 48 | |
- 80 | |
num_fewshot: | |
description: 'num_fewshot setting (ignored if < 0)' | |
required: true | |
default: -1 | |
upload_output: | |
description: 'Upload to HF / B2' | |
required: false | |
default: true | |
type: boolean | |
env: | |
# Get the final task | |
RUN_TASK: ${{ github.event.inputs.custom_task || github.event.inputs.run_task }} | |
# HF repo to sync to | |
HF_REPO_SYNC: rwkv-x-dev/lm-eval-output | |
# Model HF repo | |
MODEL_HF_REPO: ${{ github.event.inputs.model_hf_repo }} | |
# HF / B2 sync settings | |
HUGGING_FACE_HUB_TOKEN: ${{secrets.HUGGING_FACE_HUB_TOKEN}} | |
B2_APPLICATION_KEY_ID: ${{secrets.B2_APPLICATION_KEY_ID}} | |
B2_APPLICATION_KEY: ${{secrets.B2_APPLICATION_KEY}} | |
B2_PATH_LM_EVAL_OUTPUT: ${{vars.B2_PATH_LM_EVAL_OUTPUT}} | |
jobs: | |
gh-task-runner: | |
# Check for approval notice | |
if: ${{ github.event.inputs.approval_notice == 'APPROVED' }} | |
# Strategy Matrix | |
strategy: | |
# Disable fail-fast behavior | |
fail-fast: false | |
matrix: | |
# NOTE: There is a matrix limit of 256 on github | |
run_task: | |
### Known Problematic tasks (various error reasons - file missing, temp=0, etc) | |
### ---------------------------------------------------------------------------- | |
## ---- | |
## Need to double check (removed before i started organizing by failure reasons) | |
## ---- | |
- bbh | |
- bbh_cot_zeroshot | |
# - bbh_cot_zeroshot_* | |
- bbh_fewshot | |
# - bbh_fewshot_* | |
- bbh_zeroshot | |
# - bbh_zeroshot_* | |
- bigbench_* | |
## ---- | |
## temp=0 issues | |
## ---- | |
- anagrams* | |
- babi | |
- code2text_* | |
- codexglue_code2text | |
- coqa | |
- cycle_letters | |
- drop | |
- random_insertion | |
- unscramble | |
- super-glue-* | |
- squadv2 | |
- scrolls | |
- reversed_words | |
- qasper | |
# - qasper_* | |
## ---- | |
## Does not exists / file 404s / broken links | |
## ---- | |
- csatqa | |
- csatqa_* | |
- belebele | |
# - belebele_* | |
- generate_until | |
- polemo2 | |
# - polemo2_* | |
- pile | |
# - pile_* | |
## ---- | |
## Wierd package dependencies | |
## ---- | |
#### ifeval requires `pip install langdetect immutabledict nltk` (not documented) | |
# However even after all that, it gives an "missing index error" | |
- ifeval | |
#### minerva_math: antlr4 (not on pip?) | |
- minerva_math | |
#### realtoxicityprompts: requires PERSPECTIVE_API_KEY | |
- realtoxicityprompts | |
# - minerva_math_* | |
## ---- | |
## Requires hugging face login | |
## ---- | |
- toxigen | |
## ---- | |
## Wierd errors (need to reinvestigate) | |
## ---- | |
- gpt3_translation_benchmarks | |
- headqa | |
# - headqa_en | |
# - headqa_es | |
- iwslt2017 | |
# - iwslt2017-* | |
- wmt-ro-en-t5-prompt | |
- wmt-t5-prompt | |
- t0_eval | |
- storycloze | |
# - storycloze_* | |
- self_consistency | |
# Name of the job | |
name: "[${{ matrix.run_task }}] ${{ github.event.inputs.model_hf_repo }} - ${{ github.event.inputs.model_args }}" | |
# Due to github worker hard limitation, of 24 hours | |
# we apply a timeout of 23 hours instead. | |
timeout-minutes: 1380 | |
# Select the type of runner that the job will run on | |
runs-on: | |
- ${{ github.event.inputs.backend }} | |
- gpu-vram-${{ github.event.inputs.gpu_vram }} | |
# - gpu-count-8 | |
# Actual task setup, and run steps | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
- name: Run the task | |
uses: ./.github/actions/gh-task-runner-composite | |
with: | |
run_task: ${{ matrix.run_task }} | |
num_fewshot: ${{ github.event.inputs.num_fewshot }} | |
model_hf_repo: ${{ github.event.inputs.model_hf_repo }} | |
model_args: ${{ github.event.inputs.model_args }} | |
batch_size: ${{ github.event.inputs.batch_size }} | |
# upload_output: False | |
upload_output: | |
name: "Upload to HF / B2" | |
needs: gh-task-runner | |
runs-on: ubuntu-latest | |
if: ${{ github.event.inputs.upload_output }} | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
- name: Run the task | |
uses: ./.github/actions/gh-upload-output | |
with: | |
num_fewshot: ${{ github.event.inputs.num_fewshot }} | |
model_hf_repo: ${{ github.event.inputs.model_hf_repo }} | |
model_args: ${{ github.event.inputs.model_args }} | |
batch_size: ${{ github.event.inputs.batch_size }} | |
backend: ${{ github.event.inputs.backend }} | |