.github/workflows/gh-task-runner-Focus-Suite.yml

name: GH Task Runner (RWKV-x-dev focused suite)
on:
  workflow_dispatch:
    inputs:
      approval_notice:
        description: 'WARNING: This will spin up a large number of tasks - get approval from admin before running'
        required: false
        default: 'NOT_APPROVED'
        type: choice
        options:
        - NOT_APPROVED
        - APPROVED
      model_hf_repo:
        description: 'Model Hugging Face Repository'
        required: true
        default: 'RWKV/rwkv-5-world-1b5'
      model_args:
        description: 'Model Arguments (ie: dtype="float16")'
        required: false
        default: 'dtype=bfloat16,trust_remote_code=True'
      batch_size:
        description: 'Batch Size'
        required: true
        default: 'auto'
      # backend:
      #   description: 'Backend to use'
      #   required: true
      #   default: 'nvidia-gpu'
      #   type: choice
      #   options:
      #   - nvidia-gpu
      #   - intel-gpu
      #   - amd-gpu
      #   - any-gpu
      gpu_vram:
        description: 'Minimum GPU VRAM (ignored for MPS)'
        required: true
        default: '24'
        type: choice
        options:
        - 16
        - 24
        # - 40
        - 48
        - 80
      rwkv5_file_url:
        description: 'Model file URL (for rwkv5 .pth eval)'
        default: ''
        required: false
      rwkv5_test_name:
        description: 'Model dev test name (for test)'
        default: 'TEST_MODEL_FILE'
        required: false
      num_fewshot:
        description: 'num_fewshot setting (ignored if < 0)'
        required: true
        default: -1
      upload_output:
        description: 'Upload to HF / B2'
        required: false
        default: true
        type: boolean

env:
  # HF repo to sync to
  HF_REPO_SYNC: rwkv-x-dev/lm-eval-output

  # Model HF repo
  MODEL_HF_REPO: ${{ github.event.inputs.model_hf_repo }}

  # HF / B2 sync settings
  HUGGING_FACE_HUB_TOKEN: ${{secrets.HUGGING_FACE_HUB_TOKEN}}
  B2_APPLICATION_KEY_ID: ${{secrets.B2_APPLICATION_KEY_ID}}
  B2_APPLICATION_KEY: ${{secrets.B2_APPLICATION_KEY}}
  B2_PATH_LM_EVAL_OUTPUT: ${{vars.B2_PATH_LM_EVAL_OUTPUT}}

jobs:
  gh-task-runner:

    # Check for approval notice
    if: ${{ github.event.inputs.approval_notice == 'APPROVED' }}

    # Strategy Matrix
    strategy:
      # Disable fail-fast behavior
      fail-fast: false 
      matrix:

        # NOTE: There is a matrix limit of 256 on github
        run_task:
          ### Main suite metrics to include
          ### ----------------------------------------------------------------------------
          - anli
          - glue
          - truthfulqa
          - cmmlu
          - lambada
          - pythia
          - mmlu
          - blimp
          - ai2_arc
          # - arc_easy
          # - arc_challenge
          - logiqa
          - winogrande
          - openbookqa
          - hellaswag
          - copa
          - sciq
          - piqa

          ### Slow suite metrics to include
          ### ----------------------------------------------------------------------------
          # - triviaqa
          - record

          ### Various multilang tasks
          ### ----------------------------------------------------------------------------
          - xcopa
          # - xcopa_*
          - xnli
          # - xnli_*
          - xstorycloze
          # - xstorycloze_*
          - xwinograd
          # - xwinograd_*
          - lambada_multilingual
          # - lambada_openai_*
          - pawsx
          # - paws_*

    # Name of the job
    name: "[${{ matrix.run_task }}] ${{ github.event.inputs.rwkv5_file_url && github.event.inputs.rwkv5_test_name || github.event.inputs.model_hf_repo }} - ${{ github.event.inputs.model_args }}"

    # Due to github worker hard limitation, of 24 hours
    # we apply a timeout of 23 hours instead.
    timeout-minutes: 1380

    # Select the type of runner that the job will run on
    runs-on: 
      - nvidia-gpu
      - gpu-vram-${{ github.event.inputs.gpu_vram }}
      # - gpu-count-any

    # Actual task setup, and run steps
    steps:
      - name: Checkout repository
        uses: actions/checkout@v3

      - name: Run the task
        uses: ./.github/actions/gh-task-runner-composite
        with:
          run_task: ${{ matrix.run_task }}
          num_fewshot: ${{ github.event.inputs.num_fewshot }}
          model_hf_repo: ${{ github.event.inputs.model_hf_repo }}
          model_args: ${{ github.event.inputs.model_args }}
          batch_size: ${{ github.event.inputs.batch_size }}
          rwkv5_file_url: ${{ github.event.inputs.rwkv5_file_url }}
          rwkv5_test_name: ${{ github.event.inputs.rwkv5_test_name }}
          upload_output: ${{ github.event.inputs.upload_output }}

  # upload_output:
  #   name: "Upload to HF / B2"
  #   needs: gh-task-runner
  #   runs-on: ubuntu-latest
  #   if: ${{ github.event.inputs.upload_output }}
  #   steps:
  #     - name: Checkout repository
  #       uses: actions/checkout@v3
  #     - name: Run the task
  #       uses: ./.github/actions/gh-upload-output
  #       with:
  #         num_fewshot: ${{ github.event.inputs.num_fewshot }}
  #         model_hf_repo: ${{ github.event.inputs.model_hf_repo }}
  #         model_args: ${{ github.event.inputs.model_args }}
  #         batch_size: ${{ github.event.inputs.batch_size }}
  #         backend: nvidia-gpu