Convert model to ONNX #3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Convert model to ONNX | |
on: | |
# Trigger manually | |
workflow_dispatch: | |
inputs: | |
source_model_id: | |
description: "Source HuggingFace model ID to pull. For ex: meta-llama/Meta-Llama-3.1-8B-Instruct" | |
required: true | |
source_model_size: | |
description: "The model size. For ex: 8B" | |
required: true | |
type: string | |
target_model_id: | |
description: "Target HuggingFace model ID to push. For ex: cortexso/llama3.1" | |
required: true | |
type: string | |
env: | |
USER_NAME: jan-hq | |
SOURCE_MODEL_ID: ${{ inputs.source_model_id }} | |
SOURCE_MODEL_SIZE: ${{ inputs.source_model_size }} | |
TARGET_MODEL_ID: ${{ inputs.target_model_id }} | |
# Valid precision + execution provider combinations are: FP32 CPU, FP32 CUDA, FP16 CUDA, FP16 DML, INT4 CPU, INT4 CUDA, INT4 DML | |
PRECISION: INT4 | |
EXECUTOR: DML | |
ONNXRUNTIME_GENAI_VERSION: 0.3.0 # Check latest version from: https://github.com/microsoft/onnxruntime-genai/releases | |
jobs: | |
converter: | |
runs-on: windows-latest | |
steps: | |
- name: Checkout | |
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 | |
with: | |
submodules: recursive | |
- name: Set up Python | |
uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1 | |
with: | |
python-version: "3.12.4" | |
cache: 'pip' # caching pip dependencies | |
- name: Install dependencies | |
run: | | |
python.exe -m pip install --upgrade pip | |
pip install huggingface_hub hf-transfer fire | |
pip install torch transformers onnx onnxruntime sentencepiece | |
pip install --pre onnxruntime-genai=="$env:ONNXRUNTIME_GENAI_VERSION" | |
- name: Extract MODEL_NAME | |
shell: powershell | |
run: | | |
$SOURCE_MODEL_ID = "${{ env.SOURCE_MODEL_ID }}" | |
$ADDR = $SOURCE_MODEL_ID -split '/' | |
$MODEL_NAME = $ADDR[-1] | |
$MODEL_NAME_LOWER = $MODEL_NAME.ToLower() | |
echo "MODEL_NAME=$MODEL_NAME_LOWER" >> $env:GITHUB_ENV | |
echo "MODEL_NAME_LOWER=$MODEL_NAME_LOWER" # For debugging | |
- name: Print environment variables | |
run: | | |
echo "SOURCE_MODEL_ID: ${{ env.SOURCE_MODEL_ID }}" | |
echo "PRECISION: ${{ env.PRECISION }}" | |
echo "EXECUTOR: ${{ env.EXECUTOR }}" | |
echo "MODEL_NAME: ${{ env.MODEL_NAME }}" | |
- name: Prepare folders | |
run: | | |
mkdir -p ${{ env.MODEL_NAME }}/hf | |
mkdir -p ${{ env.MODEL_NAME }}/onnx | |
mkdir -p ${{ env.MODEL_NAME }}/cache | |
- name: Download Hugging Face model | |
run: | | |
huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_READ }} --add-to-git-credential | |
huggingface-cli download --repo-type model --local-dir ${{ env.MODEL_NAME }}/hf ${{ env.SOURCE_MODEL_ID }} | |
huggingface-cli logout | |
- name: Convert to ONNX - DirectML - INT4 | |
run: | | |
python -m onnxruntime_genai.models.builder -m "${{ env.MODEL_NAME }}/hf" -o "${{ env.MODEL_NAME }}/onnx" -p ${{ env.PRECISION }} -e ${{ env.EXECUTOR }} | |
# - name: Upload to Hugging Face | |
# run: | | |
# Get-ChildItem -Path "${{ env.MODEL_NAME }}/onnx" -Force | |
# huggingface-cli login --token ${{ secrets.HUGGINGFACE_TOKEN_WRITE }} --add-to-git-credential | |
# huggingface-cli upload ${{ env.USER_NAME }}/${{ env.TARGET_MODEL_ID }} ${{ env.MODEL_NAME }}/onnx . --revision "${{ env.MODEL_SIZE }}-onnx" | |
# huggingface-cli logout | |
- name: Cleanup | |
if: always() | |
run: | | |
Remove-Item -Recurse -Force -Path "${{ env.MODEL_NAME }}" |