Skip to content

Commit

Permalink
Add E2E CI (#582)
Browse files Browse the repository at this point in the history
* add e2e CI

* modify the card id

* solve the concurrency issue

* add dependancy for torchbench

* add dependancy for torchbench

* modify the github.workspace path

* activate oneAPI

* git submodule update

* change triton path

* update triton build

* update triton commit

* modify test scope and yml name

* change the install e2e suites path

* change the install e2e suites path

* change use path

* change use path

* change use path

* addd bash for e2e install

* addd bash for e2e install

* addd bash for e2e install

* add composite

* modify the e2e running way

* add inductor_xpu_test.sh

* modify the E2E test sciripts

---------

Co-authored-by: Zhong, Ruijie <[email protected]>
  • Loading branch information
Stonepia and RUIJIEZHONG66166 authored Apr 4, 2024
1 parent 89eff3a commit ed69bf5
Show file tree
Hide file tree
Showing 8 changed files with 391 additions and 0 deletions.
104 changes: 104 additions & 0 deletions .github/actions/inductor-xpu-e2e-test/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
name: inductor-xpu-e2e-test

inputs:
suite:
required: true
type: string
default: "huggingface"
description: Dynamo benchmarks test suite, huggingface / timm_models / torchbench
dt:
required: true
type: string
default: "float32"
description: Data precision of the test. float32 / bfloat16 / float16 / amp_fp16 / amp_bf16
mode:
required: true
type: string
default: "inference"
description: inference / training test
scenario:
required: true
type: string
default: "accuracy"
description: accuracy / performance test
cards:
required: false
type: string
default: "all"
description: which cards can be used in the test
expected_pass_num:
required: false
type: number
description: for result check

runs:
using: composite
steps:
- name: E2E Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
shell: bash
run: |
source activate e2e_ci
cp .github/scripts/inductor_xpu_test.sh ../pytorch
cd ../pytorch
if [[ ${{ input.suite}} == "timm_models" ]]; then
pip install --no-deps "git+https://github.com/rwightman/pytorch-image-models@b9d43c7dcac1fe05e851dd7be7187b108af593d2"
elif [[ ${{ input.suite}} == "torchbench" ]]; then
pip install transformers==4.38.1 --no-deps
pip install timm==0.9.7 --no-deps
apt-get update -y
apt install libgl1-mesa-glx -y
conda install -y git-lfs pyyaml pandas scipy psutil
pip install tqdm pandas pyre-extensions torchrec tensorboardX dalle2_pytorch torch_geometric scikit-image matplotlib gym fastNLP doctr matplotlib opacus python-doctr higher opacus dominate kaldi-io librosa effdet pycocotools diffusers
pip uninstall -y pyarrow pandas
pip install pyarrow pandas
cd ..
git clone https://github.com/facebookresearch/detectron2.git
python -m pip install -e detectron2
git clone --recursive https://github.com/facebookresearch/multimodal.git multimodal
pushd multimodal
pip install -e .
popd
fi
#TRANSFORMERS_COMMIT=$(cat .ci/docker/ci_commit_pins/huggingface.txt)
#pip install --force-reinstall git+https://github.com/huggingface/transformers@${TRANSFORMERS_COMMIT}
source /opt/intel/oneapi/setvars.sh
#export PYTORCH_ENABLE_XPU_FALLBACK=1
rm -rf inductor_log
bash inductor_xpu_test.sh ${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }} xpu 3
- name: Test Results Overview (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
shell: bash
run: |
set +e
cd ../pytorch/inductor_log/${{ inputs.suite }}
cd ${{ inputs.dt }}
echo -e "============ Summary for ${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }} ============" | tee -a ./${{ inputs.suite }}_${{ inputs.dt }}_${{ inputs.mode }}_${{ inputs.scenario }}_e2e_summary.log
awk -i inplace '!seen[$0]++' inductor_${{ inputs.suite }}_${{ inputs.dt }}_${{ inputs.mode }}_xpu_${{ inputs.scenario }}.csv
csv_lines=$(cat inductor_${{ inputs.suite }}_${{ inputs.dt }}_${{ inputs.mode }}_xpu_${{ inputs.scenario }}.csv | wc -l)
let num_total=csv_lines-1
num_passed=$(grep -c "pass" inductor_${{ inputs.suite }}_${{ inputs.dt }}_${{ inputs.mode }}_xpu_${{ inputs.scenario }}.csv)
let num_failed=num_total-num_passed
pass_rate=`awk 'BEGIN{printf "%.2f%%\n",('$num_passed'/'$num_total')*100}'`
echo "num_total: $num_total" | tee -a ./${{ inputs.suite }}_${{ inputs.dt }}_${{ inputs.mode }}_${{ inputs.scenario }}_e2e_summary.log
echo "num_passed: $num_passed" | tee -a ./${{ inputs.suite }}_${{ inputs.dt }}_${{ inputs.mode }}_${{ inputs.scenario }}_e2e_summary.log
echo "num_failed: $num_failed" | tee -a ./${{ inputs.suite }}_${{ inputs.dt }}_${{ inputs.mode }}_${{ inputs.scenario }}_e2e_summary.log
echo "pass_rate: $pass_rate" | tee -a ./${{ inputs.suite }}_${{ inputs.dt }}_${{ inputs.mode }}_${{ inputs.scenario }}_e2e_summary.log
cd ${{ github.workspace }} && cp -r ../pytorch/inductor_log .
- name: Upload Inductor XPU E2E CI Data (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
uses: actions/upload-artifact@v4
with:
name: Inductor-XPU-E2E-CI-Data-${{ inputs.suite }}-${{ inputs.dt }}-${{ inputs.mode }}-${{ inputs.scenario }}-${{ github.event.pull_request.number || github.ref }}
path: ${{ github.workspace }}/../pytorch/inductor_log
- name: Test Results Check (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
if: ${{ inputs.expected_pass_num }}
shell: bash
run: |
cd ../pytorch/inductor_log/${{ inputs.suite }}
cd ${{ inputs.dt }}
num_passed=$(grep "num_passed:" ${{ inputs.suite }}_${{ inputs.dt }}_${{ inputs.mode }}_${{ inputs.scenario }}_e2e_summary.log | sed -e 's/.*://;s/[^0-9.]//')
if [ $num_passed -lt ${{ inputs.expected_pass_num }} ]; then
echo -e "[ERROR] Inductor E2E CI test for ${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} passed_num < ${{ inputs.expected_pass_num }}"
exit 1
fi
1 change: 1 addition & 0 deletions .github/ci_commit_pins/benchmark.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
fb0dfed4c8c8ab1c9816b02832f7a99d86ee4ca5
1 change: 1 addition & 0 deletions .github/ci_commit_pins/triton.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
7f56d84c8e70e2aa802ceafb07b5890161508d81
61 changes: 61 additions & 0 deletions .github/scripts/inductor-xpu-e2e.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
SUITE=${1:-huggingface}
DT=${2:-amp_bf16}
MODE=${3:-inference}
SCENARIO=${4:-accuracy}
expected_pass_num=${5:-46}

echo -e "========================================================================="
echo -e "Dependancy Install"
echo -e "========================================================================="
source activate e2e_ci
cp .github/scripts/inductor_xpu_test.sh ../pytorch
cd ../pytorch
if [[ ${SUITE} == "timm_MODEls" ]]; then
pip install --no-deps "git+https://github.com/rwightman/pytorch-image-MODEls@b9d43c7dcac1fe05e851dd7be7187b108af593d2"
elif [[ ${SUITE} == "torchbench" ]]; then
pip install transformers==4.38.1 --no-deps
pip install timm==0.9.7 --no-deps
apt-get update -y
apt install libgl1-mesa-glx -y
conda install -y git-lfs pyyaml pandas scipy psutil
pip install tqdm pandas pyre-extensions torchrec tensorboardX dalle2_pytorch torch_geometric scikit-image matplotlib gym fastNLP doctr matplotlib opacus python-doctr higher opacus dominate kaldi-io librosa effdet pycocotools diffusers
pip uninstall -y pyarrow pandas
pip install pyarrow pandas

cd ..
git clone https://github.com/facebookresearch/detectron2.git
python -m pip install -e detectron2

git clone --recursive https://github.com/facebookresearch/multimodal.git multimodal
pushd multimodal
pip install -e .
popd
fi

#TRANSFORMERS_COMMIT=$(cat .ci/docker/ci_commit_pins/huggingface.txt)
#pip install --force-reinstall git+https://github.com/huggingface/transformers@${TRANSFORMERS_COMMIT}
echo -e "========================================================================="
echo -e "E2E Test"
echo -e "========================================================================="
source /opt/intel/oneapi/setvars.sh
export HUGGING_FACE_HUB_TOKEN=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
#export PYTORCH_ENABLE_XPU_FALLBACK=1
rm -rf inductor_log
bash inductor_xpu_test.sh ${SUITE} ${DT} ${MODE} ${SCENARIO} xpu 3

echo -e "========================================================================="
echo -e "Test Results Summary"
echo -e "========================================================================="
cd ../pytorch/inductor_log/${SUITE}
cd ${DT}
echo -e "============ Summary for ${SUITE} ${DT} ${MODE} ${SCENARIO} ============" | tee -a ./${SUITE}_${DT}_${MODE}_${SCENARIO}_e2e_summary.log
awk -i inplace '!seen[$0]++' inductor_${SUITE}_${DT}_${MODE}_xpu_${SCENARIO}.csv
csv_lines=$(cat inductor_${SUITE}_${DT}_${MODE}_xpu_${SCENARIO}.csv | wc -l)
let num_total=csv_lines-1
num_passed=$(grep -c "pass" inductor_${SUITE}_${DT}_${MODE}_xpu_${SCENARIO}.csv)
let num_failed=num_total-num_passed
#pass_rate=`awk 'BEGIN{printf "%.2f%%\n",('$num_passed'/'$num_total')*100}'`
echo "num_total: $num_total" | tee -a ./${SUITE}_${DT}_${MODE}_${SCENARIO}_e2e_summary.log
echo "num_passed: $num_passed" | tee -a ./${SUITE}_${DT}_${MODE}_${SCENARIO}_e2e_summary.log
echo "num_failed: $num_failed" | tee -a ./${SUITE}_${DT}_${MODE}_${SCENARIO}_e2e_summary.log
#echo "pass_rate: $pass_rate" | tee -a ./${SUITE}_${DT}_${MODE}_${SCENARIO}_e2e_summary.log
58 changes: 58 additions & 0 deletions .github/scripts/inductor_xpu_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#! /bin/bash
# This script work for xpu / cuda device inductor tests

SUITE=${1:-huggingface} # huggingface / torchbench / timm_models
DT=${2:-float32} # float32 / float16 / amp_bf16 / amp_fp16
MODE=${3:-inference} # inference / training
SCENARIO=${4:-accuracy} # accuracy / performance
DEVICE=${5:-xpu} # xpu / cuda
CARD=${6:-0} # 0 / 1 / 2 / 3 ...
SHAPE=${7:-static} # static / dynamic
NUM_SHARDS=${8} # num test shards
SHARD_ID=${9} # shard id
MODEL_ONLY=${10} # GoogleFnet / T5Small

WORKSPACE=`pwd`
LOG_DIR=$WORKSPACE/inductor_log/${SUITE}/${DT}
mkdir -p ${LOG_DIR}
LOG_NAME=inductor_${SUITE}_${DT}_${MODE}_${DEVICE}_${SCENARIO}

Model_only_extra=""
if [[ -n "$MODEL_ONLY" ]]; then
echo "Testing model ${MODEL_ONLY}"
Model_only_extra="--only ${MODEL_ONLY}"
fi


Cur_Ver=`pip list | grep "^torch " | awk '{print $2}' | cut -d"+" -f 1`
if [ $(printf "${Cur_Ver}\n2.0.2"|sort|head -1) = "${Cur_Ver}" ]; then
Mode_extra="";
else
# For PT 2.1
Mode_extra="--inference --freezing ";
fi
if [[ $MODE == "training" ]]; then
echo "Testing with training mode."
Mode_extra="--training "
fi

Shape_extra=""
if [[ $SHAPE == "dynamic" ]]; then
echo "Testing with dynamic shapes."
Shape_extra="--dynamic-shapes --dynamic-batch-only "
fi

partition_flags=""
if [[ -n "$NUM_SHARDS" && -n "$SHARD_ID" ]]; then
partition_flags="--total-partitions $NUM_SHARDS --partition-id $SHARD_ID "
fi

ulimit -n 1048576
if [[ $DT == "amp_bf16" ]]; then
ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --amp -d${DEVICE} -n10 --no-skip --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra} --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log
elif [[ $DT == "amp_fp16" ]]; then
export INDUCTOR_AMP_DT=float16
ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --amp -d${DEVICE} -n10 --no-skip --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra} --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log
else
ZE_AFFINITY_MASK=${CARD} python benchmarks/dynamo/${SUITE}.py --${SCENARIO} --${DT} -d${DEVICE} -n10 --no-skip --dashboard ${Mode_extra} ${Shape_extra} ${partition_flags} ${Model_only_extra} --backend=inductor --timeout=4800 --output=${LOG_DIR}/${LOG_NAME}.csv 2>&1 | tee ${LOG_DIR}/${LOG_NAME}.log
fi
68 changes: 68 additions & 0 deletions .github/scripts/install-e2e-suites/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
name: install-e2e-suites

runs:
using: composite
steps:
- name: Torchvision Install
shell: bash
run: |
source activate e2e_ci
cd ../pytorch
TORCH_VISION_PIN_COMMIT=$(cat .github/ci_commit_pins/vision.txt)
cd ..
if [ ! -d "vision" ]; then
git clone --recursive https://github.com/pytorch/vision.git
fi
cd vision
git checkout ${TORCH_VISION_PIN_COMMIT}
conda install -y libpng jpeg
# TODO: We use an older version ffmpeg to avoid the vision capability issue.
conda install -y -c conda-forge 'ffmpeg<4.4'
python setup.py install
cd ..
- name: Torchtext Install
shell: bash
run: |
source activate e2e_ci
cd ../pytorch
TORCH_TEXT_PIN_COMMIT=$(cat .github/ci_commit_pins/text.txt)
cd ..
if [ ! -d "text" ]; then
git clone --recursive https://github.com/pytorch/text.git
fi
# Torchtext
cd text
git checkout ${TORCH_TEXT_PIN_COMMIT}
python setup.py clean install
cd ..
- name: Torchaudio Install
shell: bash
run: |
source activate e2e_ci
cd ../pytorch
TORCH_AUDIO_PIN_COMMIT=$(cat .github/ci_commit_pins/audio.txt)
cd ..
# Torch audio
if [ ! -d "audio" ]; then
git clone --recursive https://github.com/pytorch/audio.git
fi
cd audio
# Optionally `git checkout {pinned_commit}`
# git checkout ${TORCH_AUDIO_PIN_COMMIT} break in pinned_commit
python setup.py install
cd ..
- name: Benchmark Install
shell: bash
run: |
source activate e2e_ci
BENCHMARK_PINNED_COMMIT=$(cat .github/ci_commit_pins/benchmark.txt)
cd ..
if [ ! -d "benchmark" ]; then
git clone --recursive https://github.com/weishi-deng/benchmark
fi
cd benchmark
git checkout ${BENCHMARK_PINNED_COMMIT}
python install.py
pip install -e .
17 changes: 17 additions & 0 deletions .github/scripts/results-check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
SUITE=${1:-huggingface}
DT=${2:-amp_bf16}
MODE=${3:-inference}
SCENARIO=${4:-accuracy}
expected_pass_num=${5:-46}

echo -e "========================================================================="
echo -e "Results Check"
echo -e "========================================================================="

cd ../pytorch/inductor_log/${SUITE}
cd ${DT}
num_passed=$(grep "num_passed:" ${SUITE}_${DT}_${MODE}_${SCENARIO}_e2e_summary.log | sed -e 's/.*://;s/[^0-9.]//')
if [ $num_passed -lt ${expected_pass_num} ]; then
echo -e "[ERROR] Inductor E2E CI test for ${SUITE} ${DT} ${MODE} passed_num < ${expected_pass_num}"
exit 1
fi
81 changes: 81 additions & 0 deletions .github/workflows/inductor_xpu_e2e_ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
name: E2E CI Tests

on:
workflow_dispatch:
pull_request:
branches: [dev/triton-test-3.0]
merge_group:
branches: [dev/triton-test-3.0]
types: [checks_requested]

permissions: read-all

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
Inductor-XPU-E2E-CI-Tests:
runs-on: [self-hosted, Inductor_test]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Prepare Conda ENV
run: |
which conda
conda remove -yn e2e_ci --all
if conda env list | grep -q "^e2e_ci "; then source activate e2e_ci; else conda create -n e2e_ci python=3.9 cmake ninja -y; fi
conda install intel::mkl-static intel::mkl-include -y
pip install pandas scipy tqdm
- name: Prepare Pytorch
run: |
source activate e2e_ci
pwd
cd ../ && rm -rf pytorch
git clone -b dev/triton-test-3.0 https://github.com/Stonepia/pytorch.git pytorch
cd pytorch && git log -n 1 && git submodule sync && git submodule update --init --recursive
conda install -c conda-forge libstdcxx-ng -y
pip install pyyaml
pip install -r requirements.txt
python setup.py develop
- name: Prepare IPEX
run: |
source activate e2e_ci
source /opt/intel/oneapi/setvars.sh
python -c "import torch;print(f'torch version {torch.__version__}')"
python -m pip uninstall intel_extension_for_pytorch -y
export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
cd ${{ github.workspace }}
git submodule sync && git submodule update --init --recursive --jobs 0
python -m pip install -r requirements.txt
python setup.py bdist_wheel
pip install --force-reinstall dist/*.whl
- name: Triton Installation
run: |
source activate e2e_ci
TRITON_PINNED_COMMIT=$(cat .github/ci_commit_pins/triton.txt)
echo ${TRITON_REPO}@${TRITON_PINNED_COMMIT}
cd ${{ github.workspace }}
cd ..
TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton"
echo ${TRITON_REPO}@${TRITON_PINNED_COMMIT}
pip install --force-reinstall "git+${TRITON_REPO}@${TRITON_PINNED_COMMIT}#subdirectory=python"
- name: E2E dependancy install
uses: ./.github/scripts/install-e2e-suites

- name: Huggingface AMP_BF16 Inference Accuracy Test
run: |
bash .github/scripts/inductor-xpu-e2e.sh
- name: Upload Triton Inductor E2E CI Data
uses: actions/upload-artifact@v3
with:
name: Triton-Inductor-E2E-CI-Data
path: /home/ipex/actions-runner/_work/intel-extension-for-pytorch/pytorch/inductor_log/

- name: Test Result Check
run: |
bash .github/scripts/results-check.sh

0 comments on commit ed69bf5

Please sign in to comment.