Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/ctuning/mlcommons-ck
Browse files Browse the repository at this point in the history
  • Loading branch information
gfursin committed Oct 23, 2023
2 parents bbd581a + 7354452 commit 75e67a3
Show file tree
Hide file tree
Showing 7 changed files with 322 additions and 0 deletions.
13 changes: 13 additions & 0 deletions cm-mlops/script/get-generic-python-lib/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,19 @@
"CM_POLYGRAPHY_VERSION"
]
},
"onnx-graphsurgeon": {
"deps": [
{
"tags": "get,generic-python-lib,_package.nvidia-pyindex"
}
],
"env": {
"CM_GENERIC_PYTHON_PACKAGE_NAME": "onnx_graphsurgeon"
},
"new_env_keys": [
"CM_ONNX_GRAPHSURGEON_VERSION"
]
},
"protobuf": {
"env": {
"CM_GENERIC_PYTHON_PACKAGE_NAME": "protobuf"
Expand Down
129 changes: 129 additions & 0 deletions cm-mlops/script/get-ml-model-bert-large-squad/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@
},
"update_tags_from_env_with_prefix": {
"_url.": [ "CM_PACKAGE_URL" ]
},
"force_cache": true,
"extra_cache_tags": "bert-large,ml-model",
"skip_if_env": {
"CM_ML_MODEL_BERT_PACKED": [ "yes" ]
}
}
],
Expand Down Expand Up @@ -211,6 +216,130 @@
"env": {
"CM_PACKAGE_URL": "https://github.com/mlcommons/inference_results_v2.1/raw/master/open/NeuralMagic/code/bert/deepsparse/models/oBERT-Large_95sparse_block4_qat.onnx.tar.xz"
}
},
"unpacked": {
"group": "packing",
"default": true,
"env": {
"CM_ML_MODEL_BERT_PACKED": "no"
}
},
"packed": {
"group": "packing",
"env": {
"CM_ML_MODEL_BERT_PACKED": "yes"
},
"deps": [
{
"tags": "get,python3",
"names": [ "python", "python3" ]
},
{
"tags": "get,generic-python-lib,_torch",
"names": [ "torch", "pytorch" ]
},
{
"tags": "get,generic-python-lib,_package.tensorflow",
"names": [ "tensorflow" ]
},
{
"tags": "get,generic-python-lib,_package.setuptools_rust",
"names": [ "setuptools_rust" ]
},
{
"tags": "get,generic-python-lib,_package.transformers",
"names": [ "transformers" ],
"version": "2.4.0"
},
{
"tags": "get,generic-python-lib,_package.protobuf",
"names": [ "protobuf" ],
"version1": "3.20.3"
},
{
"tags": "get,generic-python-lib,_package.onnx",
"names": [ "onnx" ],
"version1": "1.12.0"
},
{
"tags": "get,generic-python-lib,_onnx-graphsurgeon",
"names": [ "onnx-graphsurgeon" ],
"version1": "0.3.26"
},
{
"tags": "get,generic-python-lib,_package.onnx-simplifier",
"names": [ "onnx-simplifier" ],
"version1": "0.3.7"
},
{
"tags": "get,generic-python-lib,_numpy",
"names": [ "numpy" ],
"version1": "1.23.0"
},
{
"tags": "get,mlperf,inference,src",
"names": [ "inference-src" ]
}
],
"prehook_deps": [
{
"tags": "download,file,_wget,_url.https://zenodo.org/record/3733868/files/model.ckpt-5474.data-00000-of-00001",
"env": {
"CM_DOWNLOAD_FILENAME": "model.ckpt-5474.data-00000-of-00001",
"CM_DOWNLOAD_FINAL_ENV_NAME": "CM_BERT_CHECKPOINT_DATA_PATH",
"CM_DOWNLOAD_PATH": "<<<CM_BERT_CHECKPOINT_DOWNLOAD_DIR>>>",
"CM_DOWNLOAD_CHECKSUM": "3089b27c559906a868878741d992ade7"
},
"force_cache": true,
"extra_cache_tags": "bert,checkpoint,weights,bert-large"
},
{
"tags": "download,file,_wget,_url.https://zenodo.org/record/3733868/files/model.ckpt-5474.index",
"env": {
"CM_DOWNLOAD_FILENAME": "model.ckpt-5474.index",
"CM_DOWNLOAD_FINAL_ENV_NAME": "CM_BERT_CHECKPOINT_INDEX_PATH",
"CM_DOWNLOAD_PATH": "<<<CM_BERT_CHECKPOINT_DOWNLOAD_DIR>>>",
"CM_DOWNLOAD_CHECKSUM": "d23d61572d9404da4dac3363b5bc735b"
},
"force_cache": true,
"extra_cache_tags": "bert,checkpoint-index,bert-large"
},
{
"tags": "download,file,_wget,_url.https://zenodo.org/record/3733868/files/model.ckpt-5474.meta",
"env": {
"CM_DOWNLOAD_FILENAME": "model.ckpt-5474.meta",
"CM_DOWNLOAD_FINAL_ENV_NAME": "CM_BERT_CHECKPOINT_META_PATH",
"CM_DOWNLOAD_PATH": "<<<CM_BERT_CHECKPOINT_DOWNLOAD_DIR>>>",
"CM_DOWNLOAD_CHECKSUM": "83e11e57eea14c9e9a246af74af40d66"
},
"force_cache": true,
"extra_cache_tags": "bert,checkpoint-meta,bert-large"
},
{
"tags": "download,file,_wget,_url.https://zenodo.org/record/3733868/files/vocab.txt",
"env": {
"CM_DOWNLOAD_FILENAME": "vocab.txt",
"CM_DOWNLOAD_FINAL_ENV_NAME": "CM_BERT_VOCAB_PATH",
"CM_DOWNLOAD_PATH": "<<<CM_BERT_CHECKPOINT_DOWNLOAD_DIR>>>",
"CM_DOWNLOAD_CHECKSUM": "64800d5d8528ce344256daf115d4965e"
},
"force_cache": true,
"extra_cache_tags": "bert,vocab,bert-large"
},
{
"tags": "download,file,_wget,_url.https://raw.githubusercontent.com/krai/axs2kilt/main/model_onnx_bert_large_packed_recipe/convert_model.py",
"env": {
"CM_DOWNLOAD_FILENAME": "convert_model.py",
"CM_DOWNLOAD_FINAL_ENV_NAME": "CM_BERT_CONVERTER_CODE_PATH",
"CM_DOWNLOAD_CHECKSUM": "94c91ce422e8f36f9d98b4926e2ad688"
},
"force_cache": true,
"extra_cache_tags": "bert,checkpoint,converter,code,bert-large"
}
],
"new_env_keys": [
"CM_BERT_"
]
}
}
}
7 changes: 7 additions & 0 deletions cm-mlops/script/get-ml-model-bert-large-squad/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ def preprocess(i):
os_info = i['os_info']

env = i['env']
if env.get('CM_ML_MODEL_BERT_PACKED', '') == 'yes':
i['run_script_input']['script_name'] = "run-packed"
env['CM_BERT_CONFIG_PATH'] = os.path.join(env['CM_MLPERF_INFERENCE_BERT_PATH'], "bert_config.json")
env['CM_BERT_CHECKPOINT_DOWNLOAD_DIR'] = os.path.join(os.getcwd(), "downloaded")
env['CM_ML_MODEL_FILE_WITH_PATH'] = os.path.join(os.getcwd(), "model.onnx")
env['CM_ML_MODEL_BERT_PACKED_PATH'] = os.path.join(os.getcwd(), "model.onnx")

return {'return':0}

def postprocess(i):
Expand Down
6 changes: 6 additions & 0 deletions cm-mlops/script/get-ml-model-bert-large-squad/run-packed.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

cmd="${CM_PYTHON_BIN_WITH_PATH} ${CM_BERT_CONVERTER_CODE_PATH} --src '${PWD}/downloaded/model.ckpt-5474' --dest '$PWD/' --config_path '${CM_BERT_CONFIG_PATH}'"
echo $cmd
eval $cmd
test $? -eq 0 || exit $?
82 changes: 82 additions & 0 deletions cm-mlops/script/get-preprocessed-dataset-squad/_cm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
alias: get-preprocessed-dataset-squad
automation_alias: script
automation_uid: 5b4e0237da074764
cache: true
deps:
- tags: get,python3
names:
- python
- python3
- tags: get,mlperf,inference,src
names:
- inference-src
- tags: get,squad,dataset,original
names:
- squad-dataset
- tags: get,squad,vocab
names:
- squad-vocab
- tags: get,generic-python-lib,_package.tokenization

docker_input_mapping: {}
input_description: {}
env:
CM_DATASET_MAX_QUERY_LENGTH: 64

input_mapping: {}
new_env_keys:
- CM_DATASET_SQUAD_TOKENIZED_*
new_state_keys: []
post_deps: []
posthook_deps: []
prehook_deps: []
tags:
- get
- dataset
- preprocessed
- tokenized
- squad
uid: 7cd1d9b7e8af4788
variations:
calib1:
group: calibration-set
env:
CM_SQUAD_CALIBRATION_SET: one
calib2:
group: calibration-set
env:
CM_SQUAD_CALIBRATION_SET: two
no-calib:
group: calibration-set
default: true
env:
CM_SQUAD_CALIBRATION_SET: two
raw:
group: raw
default: true
env:
CM_DATASET_RAW: "yes"
pickle:
group: raw
env:
CM_DATASET_RAW: "no"
seq-length.#:
group: seq-length
env:
CM_DATASET_MAX_SEQ_LENGTH: "#"
seq-length.384:
group: seq-length
default: true
env:
CM_DATASET_MAX_SEQ_LENGTH: 384
doc-stride.#:
group: doc-stride
env:
CM_DATASET_DOC_STRIDE: "#"
doc-stride.128:
group: doc-stride
default: true
env:
CM_DATASET_DOC_STRIDE: 128

versions: {}
45 changes: 45 additions & 0 deletions cm-mlops/script/get-preprocessed-dataset-squad/customize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from cmind import utils
import os

def preprocess(i):

os_info = i['os_info']

env = i['env']

meta = i['meta']

automation = i['automation']

quiet = (env.get('CM_QUIET', False) == 'yes')

if env.get('CM_SQUAD_CALIBRATION_SET') == "one":
env['DATASET_CALIBRATION_FILE'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], 'calibration', 'SQuAD-v1.1', 'bert_calibration_features.txt')
env['DATASET_CALIBRATION_ID'] = 1
elif env.get('CM_SQUAD_CALIBRATION_SET') == "two":
env['DATASET_CALIBRATION_FILE'] = os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], 'calibration', 'SQuAD-v1.1', 'bert_calibration_qas_ids.txt')
env['DATASET_CALIBRATION_ID'] = 2
else:
env['DATASET_CALIBRATION_FILE'] = ""
env['DATASET_CALIBRATION_ID'] = 0

env['CK_ENV_MLPERF_INFERENCE'] = env['CM_MLPERF_INFERENCE_SOURCE']

return {'return':0}

def postprocess(i):

env = i['env']
cur = os.getcwd()

env['CM_DATASET_SQUAD_TOKENIZED_ROOT'] = cur
if env.get('CM_DATASET_RAW', '') == "yes":
env['CM_DATASET_SQUAD_TOKENIZED_INPUT_IDS'] = os.path.join(cur, 'bert_tokenized_squad_v1_1_input_ids.raw')
env['CM_DATASET_SQUAD_TOKENIZED_SEGMENT_IDS'] = os.path.join(cur, 'bert_tokenized_squad_v1_1_segment_ids.raw')
env['CM_DATASET_SQUAD_TOKENIZED_INPUT_MASK'] = os.path.join(cur, 'bert_tokenized_squad_v1_1_input_mask.raw')

env['CM_DATASET_SQUAD_TOKENIZED_MAX_SEQ_LENGTH'] = env['CM_DATASET_MAX_SEQ_LENGTH']
env['CM_DATASET_SQUAD_TOKENIZED_DOC_STRIDE'] = env['CM_DATASET_DOC_STRIDE']
env['CM_DATASET_SQUAD_TOKENIZED_MAX_QUERY_LENGTH'] = env['CM_DATASET_MAX_QUERY_LENGTH']

return {'return':0}
40 changes: 40 additions & 0 deletions cm-mlops/script/get-preprocessed-dataset-squad/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash

#CM Script location: ${CM_TMP_CURRENT_SCRIPT_PATH}

#To export any variable
#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out

#${CM_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency



function exit_if_error() {
test $? -eq 0 || exit $?
}

function run() {
echo "Running: "
echo "$1"
echo ""
if [[ ${CM_FAKE_RUN} != 'yes' ]]; then
eval "$1"
exit_if_error
fi
}

#Add your run commands here...
# run "$CM_RUN_CMD"
CUR=$PWD
run "wget -nc https://raw.githubusercontent.com/krai/ck-mlperf/master/package/dataset-squad-tokenized_for_bert/tokenize_and_pack.py"

run "${CM_PYTHON_BIN_WITH_PATH} tokenize_and_pack.py \
${CM_DATASET_SQUAD_VAL_PATH} \
${CM_ML_MODEL_BERT_VOCAB_FILE_WITH_PATH} \
${CUR}/bert_tokenized_squad_v1_1 \
${CM_DATASET_MAX_SEQ_LENGTH} \
${CM_DATASET_MAX_QUERY_LENGTH} \
${CM_DATASET_DOC_STRIDE} \
${CM_DATASET_RAW} \
${DATASET_CALIBRATION_FILE} \
${DATASET_CALIBRATION_ID}"

0 comments on commit 75e67a3

Please sign in to comment.