Skip to content

test: CI convert llama3.1 #56

test: CI convert llama3.1

test: CI convert llama3.1 #56

Workflow file for this run

name: Convert model to ONNX
on:
push:
# Sequence of patterns matched against refs/heads
branches:
- 'chore/convert-onnx'
workflow_dispatch:
inputs:
source_model_id:
description: "Source HuggingFace model ID to pull. For ex: meta-llama/Meta-Llama-3.1-8B-Instruct"
required: true
source_model_size:
description: "The model size. For ex: 8B"
required: true
type: string
target_model_id:
description: "Target HuggingFace model ID to push. For ex: llama3.1"
required: true
type: string
# concurrency:
# group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
# cancel-in-progress: true
env:
USER_NAME: cortexso
SOURCE_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct #${{ inputs.source_model_id }}
SOURCE_MODEL_SIZE: 8b #${{ inputs.source_model_size }}
TARGET_MODEL_ID: llama3.1 #${{ inputs.target_model_id }}
PRECISION: int4 # Valid values: int4,fp16,fp3
EXECUTOR: dml # Valid values: cpu,cuda,dml,web
ONNXRUNTIME_GENAI_VERSION: 0.3.0 # Check version from: https://github.com/microsoft/onnxruntime-genai/releases
jobs:
converter:
runs-on: windows-onnx
steps:
- name: Checkout
uses: actions/checkout@v4 # v4.1.7
with:
submodules: recursive
- name: Set up Python
uses: actions/setup-python@v5 # v5.1.1
with:
python-version: '3.10'
# architecture: 'x64'
- name: Cache Python packages
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
with:
path: |
~/.cache/pip
~/.local/share/pip
.venv
key: ${{ runner.os }}-pip-${{ github.sha }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install dependencies
shell: powershell
run: |
# python.exe -m ensurepip
# python.exe -m pip install --upgrade pip
pip3 install -I --user huggingface_hub hf-transfer numpy==1.26.4 torch==2.3.1 transformers==4.43.4 onnx==1.16.1 onnxruntime==1.18.0 sentencepiece==0.2.0
# if ($env:EXECUTOR -eq 'cpu') { fire
# pip install --pre onnxruntime-genai=="$env:ONNXRUNTIME_GENAI_VERSION"
# } elseif ($env:EXECUTOR -eq 'dml') {
# pip install --pre onnxruntime-genai-directml=="$env:ONNXRUNTIME_GENAI_VERSION"
# } elseif ($env:EXECUTOR -eq 'cuda') {
# pip install --pre onnxruntime-genai-cuda=="$env:ONNXRUNTIME_GENAI_VERSION" --index-url=https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/
# } else {
# Write-Host "Error: Unknown EXECUTOR value: $env:EXECUTOR"
# exit 1
# }
# python -m onnxruntime_genai.models.builder --help
- name: Extract MODEL_NAME
shell: powershell
run: |
$SOURCE_MODEL_ID = "${{ env.SOURCE_MODEL_ID }}"
$ADDR = $SOURCE_MODEL_ID -split '/'
$MODEL_NAME = $ADDR[-1]
$MODEL_NAME_LOWER = $MODEL_NAME.ToLower()
echo "MODEL_NAME=$MODEL_NAME_LOWER" >> $env:GITHUB_ENV
echo "MODEL_NAME_LOWER=$MODEL_NAME_LOWER" # For debugging
- name: Print environment variables
run: |
echo "SOURCE_MODEL_ID: ${{ env.SOURCE_MODEL_ID }}"
echo "PRECISION: ${{ env.PRECISION }}"
echo "EXECUTOR: ${{ env.EXECUTOR }}"
echo "MODEL_NAME: ${{ env.MODEL_NAME }}"
# - name: Prepare folders
# run: |
# mkdir -p C:\\models\\$${{ env.MODEL_NAME }}/hf
# mkdir -p C:\\models\\$${{ env.MODEL_NAME }}/onnx
# mkdir -p C:\\models\\$${{ env.MODEL_NAME }}/cache
- name: Check file existence
id: check_files
uses: andstor/file-existence-action@v1
with:
files: "C:\\models\\${{ env.MODEL_NAME }}/hf"
- name: Download Hugging Face model
id: download_hf
if: steps.check_files.outputs.files_exists == 'true'
run: |
mkdir -p C:\\models\\${{ env.MODEL_NAME }}/hf
mkdir -p C:\\models\\${{ env.MODEL_NAME }}/onnx
mkdir -p C:\\models\\${{ env.MODEL_NAME }}/cache
huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential
huggingface-cli download --repo-type model --local-dir ${{ env.MODEL_NAME }}/hf ${{ env.SOURCE_MODEL_ID }}
huggingface-cli logout
- name: Rollback Deployment
if: steps.download_hf.outcome == 'failure'
run: |
Remove-Item -Recurse -Force -Path "$C:\\models\\{{ env.MODEL_NAME }}"
# - name: Cache Hugging Face model
# uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2
# with:
# path: ${{ env.MODEL_NAME }}/hf
# key: ${{ runner.os }}-hf-model-${{ github.sha }}
# restore-keys: |
# {{ runner.os }}-hf-model-
- name: Convert to ONNX - DirectML - INT4
shell: powershell
run: |
huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential
python3 "onnxruntime-genai/src/python/py/models/builder.py" -i "C:\\models\\${{ env.MODEL_NAME }}/hf" -o "C:\\models\\$${{ env.MODEL_NAME }}/onnx" -p ${{ env.PRECISION }} -e ${{ env.EXECUTOR }}
huggingface-cli logout
- name: Upload to Hugging Face
run: |
Get-ChildItem -Path "C:\\models\\$${{ env.MODEL_NAME }}/onnx" -Force
huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential
huggingface-cli upload ${{ env.USER_NAME }}/${{ env.TARGET_MODEL_ID }} "C:\\models\\${{ env.MODEL_NAME }}/onnx" . --revision "${{ env.SOURCE_MODEL_SIZE }}-onnx"
huggingface-cli logout
- name: Cleanup
if: always()
run: |
Remove-Item -Recurse -Force -Path "${{ env.MODEL_NAME }}"