test: CI convert llama3.1 #57
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Convert model to ONNX | |
on: | |
push: | |
# Sequence of patterns matched against refs/heads | |
branches: | |
- 'chore/convert-onnx' | |
workflow_dispatch: | |
inputs: | |
source_model_id: | |
description: "Source HuggingFace model ID to pull. For ex: meta-llama/Meta-Llama-3.1-8B-Instruct" | |
required: true | |
source_model_size: | |
description: "The model size. For ex: 8B" | |
required: true | |
type: string | |
target_model_id: | |
description: "Target HuggingFace model ID to push. For ex: llama3.1" | |
required: true | |
type: string | |
# concurrency: | |
# group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
# cancel-in-progress: true | |
env: | |
USER_NAME: cortexso | |
SOURCE_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct #${{ inputs.source_model_id }} | |
SOURCE_MODEL_SIZE: 8b #${{ inputs.source_model_size }} | |
TARGET_MODEL_ID: llama3.1 #${{ inputs.target_model_id }} | |
PRECISION: int4 # Valid values: int4,fp16,fp3 | |
EXECUTOR: dml # Valid values: cpu,cuda,dml,web | |
ONNXRUNTIME_GENAI_VERSION: 0.3.0 # Check version from: https://github.com/microsoft/onnxruntime-genai/releases | |
jobs: | |
converter: | |
runs-on: windows-onnx | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 # v4.1.7 | |
with: | |
submodules: recursive | |
- name: Set up Python | |
uses: actions/setup-python@v5 # v5.1.1 | |
with: | |
python-version: '3.10' | |
# architecture: 'x64' | |
- name: Cache Python packages | |
uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 | |
with: | |
path: | | |
~/.cache/pip | |
~/.local/share/pip | |
.venv | |
key: ${{ runner.os }}-pip-${{ github.sha }} | |
restore-keys: | | |
${{ runner.os }}-pip- | |
- name: Install dependencies | |
shell: powershell | |
run: | | |
# python.exe -m ensurepip | |
# python.exe -m pip install --upgrade pip | |
pip3 install -I --user huggingface_hub hf-transfer numpy==1.26.4 torch==2.3.1 transformers==4.43.4 onnx==1.16.1 onnxruntime==1.18.0 sentencepiece==0.2.0 | |
# if ($env:EXECUTOR -eq 'cpu') { fire | |
# pip install --pre onnxruntime-genai=="$env:ONNXRUNTIME_GENAI_VERSION" | |
# } elseif ($env:EXECUTOR -eq 'dml') { | |
# pip install --pre onnxruntime-genai-directml=="$env:ONNXRUNTIME_GENAI_VERSION" | |
# } elseif ($env:EXECUTOR -eq 'cuda') { | |
# pip install --pre onnxruntime-genai-cuda=="$env:ONNXRUNTIME_GENAI_VERSION" --index-url=https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/ | |
# } else { | |
# Write-Host "Error: Unknown EXECUTOR value: $env:EXECUTOR" | |
# exit 1 | |
# } | |
# python -m onnxruntime_genai.models.builder --help | |
- name: Extract MODEL_NAME | |
shell: powershell | |
run: | | |
$SOURCE_MODEL_ID = "${{ env.SOURCE_MODEL_ID }}" | |
$ADDR = $SOURCE_MODEL_ID -split '/' | |
$MODEL_NAME = $ADDR[-1] | |
$MODEL_NAME_LOWER = $MODEL_NAME.ToLower() | |
echo "MODEL_NAME=$MODEL_NAME_LOWER" >> $env:GITHUB_ENV | |
echo "MODEL_NAME_LOWER=$MODEL_NAME_LOWER" # For debugging | |
- name: Print environment variables | |
run: | | |
echo "SOURCE_MODEL_ID: ${{ env.SOURCE_MODEL_ID }}" | |
echo "PRECISION: ${{ env.PRECISION }}" | |
echo "EXECUTOR: ${{ env.EXECUTOR }}" | |
echo "MODEL_NAME: ${{ env.MODEL_NAME }}" | |
# - name: Prepare folders | |
# run: | | |
# mkdir -p C:\\models\\$${{ env.MODEL_NAME }}/hf | |
# mkdir -p C:\\models\\$${{ env.MODEL_NAME }}/onnx | |
# mkdir -p C:\\models\\$${{ env.MODEL_NAME }}/cache | |
- name: Check file existence | |
id: check_files | |
uses: andstor/file-existence-action@v1 | |
with: | |
files: "C:\\models\\${{ env.MODEL_NAME }}/hf" | |
- name: Download Hugging Face model | |
id: download_hf | |
if: steps.check_files.outputs.files_exists == 'false' | |
run: | | |
mkdir -p C:\\models\\${{ env.MODEL_NAME }}/hf | |
mkdir -p C:\\models\\${{ env.MODEL_NAME }}/onnx | |
mkdir -p C:\\models\\${{ env.MODEL_NAME }}/cache | |
huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential | |
huggingface-cli download --repo-type model --local-dir ${{ env.MODEL_NAME }}/hf ${{ env.SOURCE_MODEL_ID }} | |
huggingface-cli logout | |
- name: Remove Failure Download | |
if: steps.download_hf.outcome == 'failure' | |
run: | | |
Remove-Item -Recurse -Force -Path "$C:\\models\\{{ env.MODEL_NAME }}" | |
# - name: Cache Hugging Face model | |
# uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 | |
# with: | |
# path: ${{ env.MODEL_NAME }}/hf | |
# key: ${{ runner.os }}-hf-model-${{ github.sha }} | |
# restore-keys: | | |
# {{ runner.os }}-hf-model- | |
- name: Convert to ONNX - DirectML - INT4 | |
shell: powershell | |
run: | | |
huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential | |
python3 "onnxruntime-genai/src/python/py/models/builder.py" -i "C:\\models\\${{ env.MODEL_NAME }}/hf" -o "C:\\models\\$${{ env.MODEL_NAME }}/onnx" -p ${{ env.PRECISION }} -e ${{ env.EXECUTOR }} | |
huggingface-cli logout | |
- name: Upload to Hugging Face | |
run: | | |
Get-ChildItem -Path "C:\\models\\$${{ env.MODEL_NAME }}/onnx" -Force | |
huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential | |
huggingface-cli upload ${{ env.USER_NAME }}/${{ env.TARGET_MODEL_ID }} "C:\\models\\${{ env.MODEL_NAME }}/onnx" . --revision "${{ env.SOURCE_MODEL_SIZE }}-onnx" | |
huggingface-cli logout | |
# - name: Cleanup | |
# if: always() | |
# run: | | |
# Remove-Item -Recurse -Force -Path "${{ env.MODEL_NAME }}" |