.github/workflows/gh-task-runner-Large-Suite.yml

name: GH Task Runner (Large Suite)
on:
  workflow_dispatch:
    inputs:
      approval_notice:
        description: 'WARNING: This will spin up a large number of tasks - get approval from admin before running'
        required: false
        default: 'NOT_APPROVED'
        type: choice
        options:
        - NOT_APPROVED
        - APPROVED
      model_hf_repo:
        description: 'Model Hugging Face Repository'
        required: true
        default: 'RWKV/rwkv-5-world-1b5'
      model_args:
        description: 'Model Arguments (ie: dtype="float16")'
        required: false
        default: 'dtype=bfloat16,trust_remote_code=True'
      batch_size:
        description: 'Batch Size'
        required: true
        default: 'auto'
      # backend:
      #   description: 'Backend to use'
      #   required: true
      #   default: 'nvidia-gpu'
      #   type: choice
      #   options:
      #   - nvidia-gpu
      #   - intel-gpu
      #   - amd-gpu
      #   - any-gpu
      gpu_vram:
        description: 'Minimum GPU VRAM (ignored for MPS)'
        required: true
        default: '24'
        type: choice
        options:
        - 16
        - 24
        # - 40
        - 48
        - 80
      rwkv5_file_url:
        description: 'Model file URL (for rwkv5 .pth eval)'
        default: ''
        required: false
      rwkv5_test_name:
        description: 'Model dev test name (for test)'
        default: 'TEST_MODEL_FILE'
        required: false
      num_fewshot:
        description: 'num_fewshot setting (ignored if < 0)'
        required: true
        default: -1
      upload_output:
        description: 'Upload to HF / B2'
        required: false
        default: true
        type: boolean

env:
  # Get the final task
  RUN_TASK: ${{ github.event.inputs.custom_task || github.event.inputs.run_task }}

  # HF repo to sync to
  HF_REPO_SYNC: rwkv-x-dev/lm-eval-output

  # Model HF repo
  MODEL_HF_REPO: ${{ github.event.inputs.model_hf_repo }}

  # HF / B2 sync settings
  HUGGING_FACE_HUB_TOKEN: ${{secrets.HUGGING_FACE_HUB_TOKEN}}
  B2_APPLICATION_KEY_ID: ${{secrets.B2_APPLICATION_KEY_ID}}
  B2_APPLICATION_KEY: ${{secrets.B2_APPLICATION_KEY}}
  B2_PATH_LM_EVAL_OUTPUT: ${{vars.B2_PATH_LM_EVAL_OUTPUT}}

jobs:
  gh-task-runner:

    # Check for approval notice
    if: ${{ github.event.inputs.approval_notice == 'APPROVED' }}

    # Strategy Matrix
    strategy:
      # Disable fail-fast behavior
      fail-fast: false 
      matrix:
        # NOTE: There is a matrix limit of 256 on github
        run_task:

          ### Large task collection, which overlaps with existing tasks
          ### ----------------------------------------------------------------------------
          # - multiple_choice
          # - loglikelihood

          ### Medium Slow tasks (>=15 mins on 4x3090)
          ### ----------------------------------------------------------------------------
          - glue
          # - gsm8k
          - kmmlu
          - qqp
          - pythia
          # - kmmlu_*
          
          ### Faster tasks (<= 15 mins on 4x3090)
          ### ----------------------------------------------------------------------------

          - arithmetic_*
          - asdiv

          - anli
          # - anli_*
          - ai2_arc
          # - arc_easy
          # - arc_challenge
          - arithmetic
          - blimp
          # - blimp_*
          - boolq
          - cb
          - ceval-valid
          # - ceval-valid_*
          - cmmlu
          # - cmmlu_*
          - cola
          - copa
          - crows_pairs
          # - crows_pairs_*
          - freebase
          - hellaswag
          - kobest
          # - kobest_*
          - lambada
          - lambada_cloze
          # - lambada_*
          - logieval
          - logiqa
          - logiqa2
          - mathqa
          - mc_taco
          - medmcqa
          - medqa_4options
          - mmlu
          # - mmlu_*
          - mnli
          - mnli_mismatch
          - mrpc
          - multimedqa
          - multirc
          - mutual
          - mutual_plus
          - openbookqa
          - piqa
          - prost
          - pubmedqa
          - qa4mre
          # - qa4mre_*
          - qnli
          - race
          - rte
          - sciq
          - sglue_rte
          - sst2
          - swag
          - sycophancy
          # - sycophancy_on_*
          - truthfulqa
          # - truthfulqa_*
          - webqs
          - wic
          - wikitext
          - winogrande
          - wnli
          - wsc
          - wsc273
          

    # Name of the job
    name: "[${{ matrix.run_task }}] ${{ github.event.inputs.rwkv5_file_url && github.event.inputs.rwkv5_test_name || github.event.inputs.model_hf_repo }} - ${{ github.event.inputs.model_args }}"

    # Due to github worker hard limitation, of 24 hours
    # we apply a timeout of 23 hours instead.
    timeout-minutes: 1380

    # Select the type of runner that the job will run on
    runs-on: 
      - nvidia-gpu
      - gpu-vram-${{ github.event.inputs.gpu_vram }}
      # - gpu-count-any

    # Actual task setup, and run steps
    steps:
      - name: Checkout repository
        uses: actions/checkout@v3

      - name: Run the task
        uses: ./.github/actions/gh-task-runner-composite
        with:
          run_task: ${{ matrix.run_task }}
          num_fewshot: ${{ github.event.inputs.num_fewshot }}
          model_hf_repo: ${{ github.event.inputs.model_hf_repo }}
          model_args: ${{ github.event.inputs.model_args }}
          batch_size: ${{ github.event.inputs.batch_size }}
          rwkv5_file_url: ${{ github.event.inputs.rwkv5_file_url }}
          rwkv5_test_name: ${{ github.event.inputs.rwkv5_test_name }}
          upload_output: ${{ github.event.inputs.upload_output }}

  # upload_output:
  #   name: "Upload to HF / B2"
  #   needs: gh-task-runner
  #   runs-on: ubuntu-latest
  #   if: ${{ github.event.inputs.upload_output }}
  #   steps:
  #     - name: Checkout repository
  #       uses: actions/checkout@v3
  #     - name: Run the task
  #       uses: ./.github/actions/gh-upload-output
  #       with:
  #         num_fewshot: ${{ github.event.inputs.num_fewshot }}
  #         model_hf_repo: ${{ github.event.inputs.model_hf_repo }}
  #         model_args: ${{ github.event.inputs.model_args }}
  #         batch_size: ${{ github.event.inputs.batch_size }}
  #         backend: nvidia-gpu