Skip to content

Commit

Permalink
Merge pull request #69 from furiosa-ai/add_setting_scripts
Browse files Browse the repository at this point in the history
env setting 및 evaluation script 추가
  • Loading branch information
BeomGeunCho authored Jul 1, 2024
2 parents bece5ce + 863da2c commit c12fac0
Show file tree
Hide file tree
Showing 5 changed files with 462 additions and 0 deletions.
48 changes: 48 additions & 0 deletions scripts/build_qllama2-70b_env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash

# define env. variables
model_name=qllama2-70b
model_dir=language/llama2-70b
git_dir=$(git rev-parse --show-toplevel)
work_dir=$git_dir/$model_dir
data_dir=$git_dir/data
env_name=mlperf-$model_name
conda_base=$($CONDA_EXE info --base)

# work on model directory
cd $work_dir

# create and enter conda env.
printf "\n============= STEP-1: Create conda environment and activate =============\n"
conda remove -n $env_name --all -y
rm -rf $conda_base/env/$env_name
conda env create -f $git_dir/scripts/envs/$model_name\_env_quant.yml # quant model multi-gpu inference를 실험할 때
set +u
source "$conda_base/etc/profile.d/conda.sh"
conda activate $env_name
set -u

# build mlperf loadgen
printf "\n============= STEP-2: Build mlperf loadgen =============\n"
conda install pybind11==2.10.4 -c conda-forge -y
cd $git_dir/loadgen; python -m pip install .

# pull model and dataset
printf "\n============= STEP-3: Pull dvc data =============\n"
pip install dvc[s3]
dvc pull $data_dir/models/llama2/Llama-2-70b-chat-hf/model-0000*.dvc --force
dvc pull $data_dir/models/llama2/Llama-2-70b-chat-hf/model-0001*.dvc --force
dvc pull $data_dir/models/llama2/Llama-2-70b-chat-hf/pytorch_model-0000*.dvc --force
dvc pull $data_dir/models/llama2/Llama-2-70b-chat-hf/pytorch_model-0001*.dvc --force
# shopt -s extglob
dvc pull $data_dir/models/llama2/Llama-2-70b-chat-hf/!(model-000*|pytorch_model-000*).dvc --force
dvc pull $data_dir/dataset/open-orca/validation --force
dvc pull $data_dir/dataset/open-orca/calibration --force
dvc pull $data_dir/quantization/llama2.dvc --force
# printf "\n============= End of build =============\n"

# exit from conda env.
conda deactivate

# get back to git root
cd $git_dir
110 changes: 110 additions & 0 deletions scripts/envs/qllama2-70b_env.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
name: mlperf-qllama2-70b
channels:
- defaults
dependencies:
- python~=3.10.0
- pip:
- --extra-index-url https://download.pytorch.org/whl/cu118
- torch==2.1.0+cu118
- absl-py==2.1.0
- accelerate==0.28.0
- aiofiles==23.2.1
- aiohttp==3.8.6
- aiosignal==1.3.1
- async-timeout==4.0.3
- attrs==23.2.0
- boto3==1.34.67
- botocore==1.34.67
- certifi==2024.2.2
- charset-normalizer==3.3.2
- click==8.1.7
- cmake==3.28.3
- coloredlogs==15.0.1
- datasets==2.18.0
- dill==0.3.8
- evaluate==0.4.1
- filelock==3.13.1
- flatbuffers==24.3.7
- frozenlist==1.4.1
- fsspec==2024.2.0
- furiosa-common==0.10.1
- git+https://github.com/furiosa-ai/[email protected]
- furiosa-optimizer==0.10.0
- graphviz==0.20.3
- huggingface-hub==0.21.4
- humanfriendly==10.0
- idna==3.6
- Jinja2==3.1.3
- jmespath==1.0.1
- joblib==1.3.2
- lit==18.1.1
- markdown-it-py==3.0.0
- MarkupSafe==2.1.5
- mdurl==0.1.2
- git+https://github.com/furiosa-ai/[email protected]
- mpmath==1.3.0
- multidict==6.0.5
- multipledispatch==1.0.0
- multiprocess==0.70.16
- networkx==3.2.1
- nltk==3.8.1
- numpy==1.26.4
- nvidia-cublas-cu12==12.1.3.1
- nvidia-cuda-cupti-cu12==12.1.105
- nvidia-cuda-nvrtc-cu12==12.1.105
- nvidia-cuda-runtime-cu12==12.1.105
- nvidia-cudnn-cu12==8.9.2.26
- nvidia-cufft-cu12==11.0.2.54
- nvidia-curand-cu12==10.3.2.106
- nvidia-cusolver-cu12==11.4.5.107
- nvidia-cusparse-cu12==12.1.0.106
- nvidia-nccl-cu12==2.20.5
- nvidia-nvjitlink-cu12==12.5.40
- nvidia-nvtx-cu12==12.1.105
- onnx==1.14.1
- onnx-simplifier==0.4.36
- onnxoptimizer==0.3.13
- onnxruntime==1.15.1
- packaging==24.0
- pandas==2.2.1
- pillow==10.2.0
- protobuf==5.26.0
- psutil==5.9.8
- pyarrow==15.0.2
- pyarrow-hotfix==0.6
- pybind11==2.11.1
- pydot==2.0.0
- Pygments==2.17.2
- pyparsing==3.1.2
- python-dateutil==2.9.0.post0
- pytz==2024.1
- PyYAML==6.0.1
- regex==2023.12.25
- requests==2.31.0
- responses==0.18.0
- rich==13.7.1
- rouge_score==0.1.2
- ruamel.yaml==0.18.6
- ruamel.yaml.clib==0.2.8
- ruff==0.4.9
- s3transfer==0.10.1
- safetensors==0.4.2
- scipy==1.10.1
- sentencepiece==0.1.99
- simplejson==3.19.2
- six==1.16.0
- sympy==1.12
- timm==0.6.11
- tokenizers==0.13.3
- torch==2.1.0+cu118
- torchaudio==2.1.0+cu118
- torchvision==0.16.0+cu118
- tqdm==4.65.0
- transformers==4.31.0
- triton==2.1.0
- typing==3.7.4.3
- typing_extensions==4.10.0
- tzdata==2024.1
- urllib3==2.2.1
- xxhash==3.4.1
- yarl==1.9.4
110 changes: 110 additions & 0 deletions scripts/envs/qllama2-70b_env_quant.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
name: mlperf-qllama2-70b
channels:
- defaults
dependencies:
- python~=3.10.0
- pip:
- --extra-index-url https://download.pytorch.org/whl/cu118
- torch==2.1.0+cu118
- absl-py==2.1.0
- git+https://github.com/furiosa-ai/accelerate-compression.git@4d7b404041834d35727064e5b1dcfcd060319ad6#egg=accelerate
- aiofiles==23.2.1
- aiohttp==3.8.6
- aiosignal==1.3.1
- async-timeout==4.0.3
- attrs==23.2.0
- boto3==1.34.67
- botocore==1.34.67
- certifi==2024.2.2
- charset-normalizer==3.3.2
- click==8.1.7
- cmake==3.28.3
- coloredlogs==15.0.1
- datasets==2.18.0
- dill==0.3.8
- evaluate==0.4.1
- filelock==3.13.1
- flatbuffers==24.3.7
- frozenlist==1.4.1
- fsspec==2024.2.0
- furiosa-common==0.10.1
- git+https://github.com/furiosa-ai/[email protected]
- furiosa-optimizer==0.10.0
- graphviz==0.20.3
- huggingface-hub==0.21.4
- humanfriendly==10.0
- idna==3.6
- Jinja2==3.1.3
- jmespath==1.0.1
- joblib==1.3.2
- lit==18.1.1
- markdown-it-py==3.0.0
- MarkupSafe==2.1.5
- mdurl==0.1.2
- git+https://github.com/furiosa-ai/[email protected]
- mpmath==1.3.0
- multidict==6.0.5
- multipledispatch==1.0.0
- multiprocess==0.70.16
- networkx==3.2.1
- nltk==3.8.1
- numpy==1.26.4
- nvidia-cublas-cu12==12.1.3.1
- nvidia-cuda-cupti-cu12==12.1.105
- nvidia-cuda-nvrtc-cu12==12.1.105
- nvidia-cuda-runtime-cu12==12.1.105
- nvidia-cudnn-cu12==8.9.2.26
- nvidia-cufft-cu12==11.0.2.54
- nvidia-curand-cu12==10.3.2.106
- nvidia-cusolver-cu12==11.4.5.107
- nvidia-cusparse-cu12==12.1.0.106
- nvidia-nccl-cu12==2.20.5
- nvidia-nvjitlink-cu12==12.5.40
- nvidia-nvtx-cu12==12.1.105
- onnx==1.14.1
- onnx-simplifier==0.4.36
- onnxoptimizer==0.3.13
- onnxruntime==1.15.1
- packaging==24.0
- pandas==2.2.1
- pillow==10.2.0
- protobuf==5.26.0
- psutil==5.9.8
- pyarrow==15.0.2
- pyarrow-hotfix==0.6
- pybind11==2.11.1
- pydot==2.0.0
- Pygments==2.17.2
- pyparsing==3.1.2
- python-dateutil==2.9.0.post0
- pytz==2024.1
- PyYAML==6.0.1
- regex==2023.12.25
- requests==2.31.0
- responses==0.18.0
- rich==13.7.1
- rouge_score==0.1.2
- ruamel.yaml==0.18.6
- ruamel.yaml.clib==0.2.8
- ruff==0.4.9
- s3transfer==0.10.1
- safetensors==0.4.2
- scipy==1.10.1
- sentencepiece==0.1.99
- simplejson==3.19.2
- six==1.16.0
- sympy==1.12
- timm==0.6.11
- tokenizers==0.13.3
- torch==2.1.0+cu118
- torchaudio==2.1.0+cu118
- torchvision==0.16.0+cu118
- tqdm==4.65.0
- git+https://github.com/furiosa-ai/transformers-comp.git@2b012fcf15006e2cb2b0d9735ebf5b1d08a744a8#egg=transformers
- triton==2.1.0
- typing==3.7.4.3
- typing_extensions==4.10.0
- tzdata==2024.1
- urllib3==2.2.1
- xxhash==3.4.1
- yarl==1.9.4
97 changes: 97 additions & 0 deletions scripts/eval_qllama2-70b_golden.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/bin/bash

# define env. variables
model_name=qllama2-70b
model_dir=language/llama2-70b
git_dir=$(git rev-parse --show-toplevel)
work_dir=$git_dir/$model_dir
data_dir=$git_dir/data
quant_data_dir=$data_dir/quantization/llama2
log_dir=$git_dir/logs
env_name=mlperf-$model_name
conda_base=$($CONDA_EXE info --base)

# work on model directory
cd $work_dir

# enter existing conda env.
source "$conda_base/etc/profile.d/conda.sh"
conda activate $env_name

# eval model
printf "\n============= STEP-4: Run eval =============\n"
SCENARIO=${SCENARIO:="Offline"}
DATA_TYPE=${DATA_TYPE:="quant"}
N_COUNT=${N_COUNT:="24576"} # total_len = 24,576
DEVICE=${DEVICE:="cuda:0"}

if [ $DEVICE = "cpu" ];
then DATA_TYPE=float32;
fi

# quantization args
QUANT_CONFIG_PATH=$quant_data_dir/quant_config.yaml
QUANT_PARAM_PATH=$quant_data_dir/quant_param_golden.npy
QUANT_FORMAT_PATH=$quant_data_dir/quant_format_golden.yaml
MODEL_SOURCE=furiosa_llm_rope
QUANT_DATATYPE=W8A8KV8


printf "<<EVAL_CONFIG>>\n"
printf "\tSCENARIO: $SCENARIO\n"
printf "\tDATA_TYPE: $DATA_TYPE\n"
printf "\tNUM_DATA: $N_COUNT\n"
printf "\tDEVICE: $DEVICE\n"

if ((${N_COUNT} < 2000));
then USER_CONF=$git_dir/internal_test.conf;
else
USER_CONF=user.conf;
fi

CHECKPOINT_PATH=$data_dir/models/llama2/Llama-2-70b-chat-hf
DATASET_PATH=$data_dir/dataset/open-orca/validation/open_orca_gpt4_tokenized_llama.sampled_24576.pkl
LOG_PATH=$log_dir/$model_name/$SCENARIO/$QUANT_DATATYPE/$(date +%Y%m%d_%H%M%S%Z)

SECONDS=0
python -u main.py --scenario Offline \
--model-path $CHECKPOINT_PATH \
--mlperf-conf ../../mlperf.conf \
--user-conf $USER_CONF \
--total-sample-count $N_COUNT \
--device $DEVICE \
--dataset-path $DATASET_PATH \
--dtype $DATA_TYPE \
--accuracy \
--output-log-dir $LOG_PATH \
--quantize \
--quant_config_path $QUANT_CONFIG_PATH \
--quant_param_path $QUANT_PARAM_PATH \
--quant_format_path $QUANT_FORMAT_PATH \
--model_source $MODEL_SOURCE \



duration=$SECONDS
printf "$((duration / 60)) minutes and $((duration % 60)) seconds elapsed." &> $LOG_PATH/elapsed_time.log

ACCURACY_LOG_FILE=$LOG_PATH/mlperf_log_accuracy.json
python evaluate-accuracy.py --checkpoint-path $CHECKPOINT_PATH \
--mlperf-accuracy-file $ACCURACY_LOG_FILE \
--dataset-file $DATASET_PATH --dtype int64 \
&> $LOG_PATH/accuracy_result.log
printf "Save eval log to $LOG_PATH"

printf "\n============= End of eval =============\n"

# unset exported env. variables
unset SCENARIO
unset DATA_TYPE
unset N_COUNT
unset DEVICE

# exit from conda env.
conda deactivate

# get back to git root
cd $git_dir
Loading

0 comments on commit c12fac0

Please sign in to comment.