forked from furiosa-ai/inference
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #69 from furiosa-ai/add_setting_scripts
env setting 및 evaluation script 추가
- Loading branch information
Showing
5 changed files
with
462 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#!/bin/bash | ||
|
||
# define env. variables | ||
model_name=qllama2-70b | ||
model_dir=language/llama2-70b | ||
git_dir=$(git rev-parse --show-toplevel) | ||
work_dir=$git_dir/$model_dir | ||
data_dir=$git_dir/data | ||
env_name=mlperf-$model_name | ||
conda_base=$($CONDA_EXE info --base) | ||
|
||
# work on model directory | ||
cd $work_dir | ||
|
||
# create and enter conda env. | ||
printf "\n============= STEP-1: Create conda environment and activate =============\n" | ||
conda remove -n $env_name --all -y | ||
rm -rf $conda_base/env/$env_name | ||
conda env create -f $git_dir/scripts/envs/$model_name\_env_quant.yml # quant model multi-gpu inference를 실험할 때 | ||
set +u | ||
source "$conda_base/etc/profile.d/conda.sh" | ||
conda activate $env_name | ||
set -u | ||
|
||
# build mlperf loadgen | ||
printf "\n============= STEP-2: Build mlperf loadgen =============\n" | ||
conda install pybind11==2.10.4 -c conda-forge -y | ||
cd $git_dir/loadgen; python -m pip install . | ||
|
||
# pull model and dataset | ||
printf "\n============= STEP-3: Pull dvc data =============\n" | ||
pip install dvc[s3] | ||
dvc pull $data_dir/models/llama2/Llama-2-70b-chat-hf/model-0000*.dvc --force | ||
dvc pull $data_dir/models/llama2/Llama-2-70b-chat-hf/model-0001*.dvc --force | ||
dvc pull $data_dir/models/llama2/Llama-2-70b-chat-hf/pytorch_model-0000*.dvc --force | ||
dvc pull $data_dir/models/llama2/Llama-2-70b-chat-hf/pytorch_model-0001*.dvc --force | ||
# shopt -s extglob | ||
dvc pull $data_dir/models/llama2/Llama-2-70b-chat-hf/!(model-000*|pytorch_model-000*).dvc --force | ||
dvc pull $data_dir/dataset/open-orca/validation --force | ||
dvc pull $data_dir/dataset/open-orca/calibration --force | ||
dvc pull $data_dir/quantization/llama2.dvc --force | ||
# printf "\n============= End of build =============\n" | ||
|
||
# exit from conda env. | ||
conda deactivate | ||
|
||
# get back to git root | ||
cd $git_dir |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
name: mlperf-qllama2-70b | ||
channels: | ||
- defaults | ||
dependencies: | ||
- python~=3.10.0 | ||
- pip: | ||
- --extra-index-url https://download.pytorch.org/whl/cu118 | ||
- torch==2.1.0+cu118 | ||
- absl-py==2.1.0 | ||
- accelerate==0.28.0 | ||
- aiofiles==23.2.1 | ||
- aiohttp==3.8.6 | ||
- aiosignal==1.3.1 | ||
- async-timeout==4.0.3 | ||
- attrs==23.2.0 | ||
- boto3==1.34.67 | ||
- botocore==1.34.67 | ||
- certifi==2024.2.2 | ||
- charset-normalizer==3.3.2 | ||
- click==8.1.7 | ||
- cmake==3.28.3 | ||
- coloredlogs==15.0.1 | ||
- datasets==2.18.0 | ||
- dill==0.3.8 | ||
- evaluate==0.4.1 | ||
- filelock==3.13.1 | ||
- flatbuffers==24.3.7 | ||
- frozenlist==1.4.1 | ||
- fsspec==2024.2.0 | ||
- furiosa-common==0.10.1 | ||
- git+https://github.com/furiosa-ai/[email protected] | ||
- furiosa-optimizer==0.10.0 | ||
- graphviz==0.20.3 | ||
- huggingface-hub==0.21.4 | ||
- humanfriendly==10.0 | ||
- idna==3.6 | ||
- Jinja2==3.1.3 | ||
- jmespath==1.0.1 | ||
- joblib==1.3.2 | ||
- lit==18.1.1 | ||
- markdown-it-py==3.0.0 | ||
- MarkupSafe==2.1.5 | ||
- mdurl==0.1.2 | ||
- git+https://github.com/furiosa-ai/[email protected] | ||
- mpmath==1.3.0 | ||
- multidict==6.0.5 | ||
- multipledispatch==1.0.0 | ||
- multiprocess==0.70.16 | ||
- networkx==3.2.1 | ||
- nltk==3.8.1 | ||
- numpy==1.26.4 | ||
- nvidia-cublas-cu12==12.1.3.1 | ||
- nvidia-cuda-cupti-cu12==12.1.105 | ||
- nvidia-cuda-nvrtc-cu12==12.1.105 | ||
- nvidia-cuda-runtime-cu12==12.1.105 | ||
- nvidia-cudnn-cu12==8.9.2.26 | ||
- nvidia-cufft-cu12==11.0.2.54 | ||
- nvidia-curand-cu12==10.3.2.106 | ||
- nvidia-cusolver-cu12==11.4.5.107 | ||
- nvidia-cusparse-cu12==12.1.0.106 | ||
- nvidia-nccl-cu12==2.20.5 | ||
- nvidia-nvjitlink-cu12==12.5.40 | ||
- nvidia-nvtx-cu12==12.1.105 | ||
- onnx==1.14.1 | ||
- onnx-simplifier==0.4.36 | ||
- onnxoptimizer==0.3.13 | ||
- onnxruntime==1.15.1 | ||
- packaging==24.0 | ||
- pandas==2.2.1 | ||
- pillow==10.2.0 | ||
- protobuf==5.26.0 | ||
- psutil==5.9.8 | ||
- pyarrow==15.0.2 | ||
- pyarrow-hotfix==0.6 | ||
- pybind11==2.11.1 | ||
- pydot==2.0.0 | ||
- Pygments==2.17.2 | ||
- pyparsing==3.1.2 | ||
- python-dateutil==2.9.0.post0 | ||
- pytz==2024.1 | ||
- PyYAML==6.0.1 | ||
- regex==2023.12.25 | ||
- requests==2.31.0 | ||
- responses==0.18.0 | ||
- rich==13.7.1 | ||
- rouge_score==0.1.2 | ||
- ruamel.yaml==0.18.6 | ||
- ruamel.yaml.clib==0.2.8 | ||
- ruff==0.4.9 | ||
- s3transfer==0.10.1 | ||
- safetensors==0.4.2 | ||
- scipy==1.10.1 | ||
- sentencepiece==0.1.99 | ||
- simplejson==3.19.2 | ||
- six==1.16.0 | ||
- sympy==1.12 | ||
- timm==0.6.11 | ||
- tokenizers==0.13.3 | ||
- torch==2.1.0+cu118 | ||
- torchaudio==2.1.0+cu118 | ||
- torchvision==0.16.0+cu118 | ||
- tqdm==4.65.0 | ||
- transformers==4.31.0 | ||
- triton==2.1.0 | ||
- typing==3.7.4.3 | ||
- typing_extensions==4.10.0 | ||
- tzdata==2024.1 | ||
- urllib3==2.2.1 | ||
- xxhash==3.4.1 | ||
- yarl==1.9.4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
name: mlperf-qllama2-70b | ||
channels: | ||
- defaults | ||
dependencies: | ||
- python~=3.10.0 | ||
- pip: | ||
- --extra-index-url https://download.pytorch.org/whl/cu118 | ||
- torch==2.1.0+cu118 | ||
- absl-py==2.1.0 | ||
- git+https://github.com/furiosa-ai/accelerate-compression.git@4d7b404041834d35727064e5b1dcfcd060319ad6#egg=accelerate | ||
- aiofiles==23.2.1 | ||
- aiohttp==3.8.6 | ||
- aiosignal==1.3.1 | ||
- async-timeout==4.0.3 | ||
- attrs==23.2.0 | ||
- boto3==1.34.67 | ||
- botocore==1.34.67 | ||
- certifi==2024.2.2 | ||
- charset-normalizer==3.3.2 | ||
- click==8.1.7 | ||
- cmake==3.28.3 | ||
- coloredlogs==15.0.1 | ||
- datasets==2.18.0 | ||
- dill==0.3.8 | ||
- evaluate==0.4.1 | ||
- filelock==3.13.1 | ||
- flatbuffers==24.3.7 | ||
- frozenlist==1.4.1 | ||
- fsspec==2024.2.0 | ||
- furiosa-common==0.10.1 | ||
- git+https://github.com/furiosa-ai/[email protected] | ||
- furiosa-optimizer==0.10.0 | ||
- graphviz==0.20.3 | ||
- huggingface-hub==0.21.4 | ||
- humanfriendly==10.0 | ||
- idna==3.6 | ||
- Jinja2==3.1.3 | ||
- jmespath==1.0.1 | ||
- joblib==1.3.2 | ||
- lit==18.1.1 | ||
- markdown-it-py==3.0.0 | ||
- MarkupSafe==2.1.5 | ||
- mdurl==0.1.2 | ||
- git+https://github.com/furiosa-ai/[email protected] | ||
- mpmath==1.3.0 | ||
- multidict==6.0.5 | ||
- multipledispatch==1.0.0 | ||
- multiprocess==0.70.16 | ||
- networkx==3.2.1 | ||
- nltk==3.8.1 | ||
- numpy==1.26.4 | ||
- nvidia-cublas-cu12==12.1.3.1 | ||
- nvidia-cuda-cupti-cu12==12.1.105 | ||
- nvidia-cuda-nvrtc-cu12==12.1.105 | ||
- nvidia-cuda-runtime-cu12==12.1.105 | ||
- nvidia-cudnn-cu12==8.9.2.26 | ||
- nvidia-cufft-cu12==11.0.2.54 | ||
- nvidia-curand-cu12==10.3.2.106 | ||
- nvidia-cusolver-cu12==11.4.5.107 | ||
- nvidia-cusparse-cu12==12.1.0.106 | ||
- nvidia-nccl-cu12==2.20.5 | ||
- nvidia-nvjitlink-cu12==12.5.40 | ||
- nvidia-nvtx-cu12==12.1.105 | ||
- onnx==1.14.1 | ||
- onnx-simplifier==0.4.36 | ||
- onnxoptimizer==0.3.13 | ||
- onnxruntime==1.15.1 | ||
- packaging==24.0 | ||
- pandas==2.2.1 | ||
- pillow==10.2.0 | ||
- protobuf==5.26.0 | ||
- psutil==5.9.8 | ||
- pyarrow==15.0.2 | ||
- pyarrow-hotfix==0.6 | ||
- pybind11==2.11.1 | ||
- pydot==2.0.0 | ||
- Pygments==2.17.2 | ||
- pyparsing==3.1.2 | ||
- python-dateutil==2.9.0.post0 | ||
- pytz==2024.1 | ||
- PyYAML==6.0.1 | ||
- regex==2023.12.25 | ||
- requests==2.31.0 | ||
- responses==0.18.0 | ||
- rich==13.7.1 | ||
- rouge_score==0.1.2 | ||
- ruamel.yaml==0.18.6 | ||
- ruamel.yaml.clib==0.2.8 | ||
- ruff==0.4.9 | ||
- s3transfer==0.10.1 | ||
- safetensors==0.4.2 | ||
- scipy==1.10.1 | ||
- sentencepiece==0.1.99 | ||
- simplejson==3.19.2 | ||
- six==1.16.0 | ||
- sympy==1.12 | ||
- timm==0.6.11 | ||
- tokenizers==0.13.3 | ||
- torch==2.1.0+cu118 | ||
- torchaudio==2.1.0+cu118 | ||
- torchvision==0.16.0+cu118 | ||
- tqdm==4.65.0 | ||
- git+https://github.com/furiosa-ai/transformers-comp.git@2b012fcf15006e2cb2b0d9735ebf5b1d08a744a8#egg=transformers | ||
- triton==2.1.0 | ||
- typing==3.7.4.3 | ||
- typing_extensions==4.10.0 | ||
- tzdata==2024.1 | ||
- urllib3==2.2.1 | ||
- xxhash==3.4.1 | ||
- yarl==1.9.4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
#!/bin/bash | ||
|
||
# define env. variables | ||
model_name=qllama2-70b | ||
model_dir=language/llama2-70b | ||
git_dir=$(git rev-parse --show-toplevel) | ||
work_dir=$git_dir/$model_dir | ||
data_dir=$git_dir/data | ||
quant_data_dir=$data_dir/quantization/llama2 | ||
log_dir=$git_dir/logs | ||
env_name=mlperf-$model_name | ||
conda_base=$($CONDA_EXE info --base) | ||
|
||
# work on model directory | ||
cd $work_dir | ||
|
||
# enter existing conda env. | ||
source "$conda_base/etc/profile.d/conda.sh" | ||
conda activate $env_name | ||
|
||
# eval model | ||
printf "\n============= STEP-4: Run eval =============\n" | ||
SCENARIO=${SCENARIO:="Offline"} | ||
DATA_TYPE=${DATA_TYPE:="quant"} | ||
N_COUNT=${N_COUNT:="24576"} # total_len = 24,576 | ||
DEVICE=${DEVICE:="cuda:0"} | ||
|
||
if [ $DEVICE = "cpu" ]; | ||
then DATA_TYPE=float32; | ||
fi | ||
|
||
# quantization args | ||
QUANT_CONFIG_PATH=$quant_data_dir/quant_config.yaml | ||
QUANT_PARAM_PATH=$quant_data_dir/quant_param_golden.npy | ||
QUANT_FORMAT_PATH=$quant_data_dir/quant_format_golden.yaml | ||
MODEL_SOURCE=furiosa_llm_rope | ||
QUANT_DATATYPE=W8A8KV8 | ||
|
||
|
||
printf "<<EVAL_CONFIG>>\n" | ||
printf "\tSCENARIO: $SCENARIO\n" | ||
printf "\tDATA_TYPE: $DATA_TYPE\n" | ||
printf "\tNUM_DATA: $N_COUNT\n" | ||
printf "\tDEVICE: $DEVICE\n" | ||
|
||
if ((${N_COUNT} < 2000)); | ||
then USER_CONF=$git_dir/internal_test.conf; | ||
else | ||
USER_CONF=user.conf; | ||
fi | ||
|
||
CHECKPOINT_PATH=$data_dir/models/llama2/Llama-2-70b-chat-hf | ||
DATASET_PATH=$data_dir/dataset/open-orca/validation/open_orca_gpt4_tokenized_llama.sampled_24576.pkl | ||
LOG_PATH=$log_dir/$model_name/$SCENARIO/$QUANT_DATATYPE/$(date +%Y%m%d_%H%M%S%Z) | ||
|
||
SECONDS=0 | ||
python -u main.py --scenario Offline \ | ||
--model-path $CHECKPOINT_PATH \ | ||
--mlperf-conf ../../mlperf.conf \ | ||
--user-conf $USER_CONF \ | ||
--total-sample-count $N_COUNT \ | ||
--device $DEVICE \ | ||
--dataset-path $DATASET_PATH \ | ||
--dtype $DATA_TYPE \ | ||
--accuracy \ | ||
--output-log-dir $LOG_PATH \ | ||
--quantize \ | ||
--quant_config_path $QUANT_CONFIG_PATH \ | ||
--quant_param_path $QUANT_PARAM_PATH \ | ||
--quant_format_path $QUANT_FORMAT_PATH \ | ||
--model_source $MODEL_SOURCE \ | ||
|
||
|
||
|
||
duration=$SECONDS | ||
printf "$((duration / 60)) minutes and $((duration % 60)) seconds elapsed." &> $LOG_PATH/elapsed_time.log | ||
|
||
ACCURACY_LOG_FILE=$LOG_PATH/mlperf_log_accuracy.json | ||
python evaluate-accuracy.py --checkpoint-path $CHECKPOINT_PATH \ | ||
--mlperf-accuracy-file $ACCURACY_LOG_FILE \ | ||
--dataset-file $DATASET_PATH --dtype int64 \ | ||
&> $LOG_PATH/accuracy_result.log | ||
printf "Save eval log to $LOG_PATH" | ||
|
||
printf "\n============= End of eval =============\n" | ||
|
||
# unset exported env. variables | ||
unset SCENARIO | ||
unset DATA_TYPE | ||
unset N_COUNT | ||
unset DEVICE | ||
|
||
# exit from conda env. | ||
conda deactivate | ||
|
||
# get back to git root | ||
cd $git_dir |
Oops, something went wrong.