mlcommons · arjunsuresh · Mar 11, 2024 · Mar 10, 2024 · Mar 10, 2024 · Mar 10, 2024
@@ -1,13 +1,13 @@
 # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 
-name: MLPerf inference MIL C++ ResNet50
+name: MLPerf inference MLCommons C++ ResNet50
 
 on:
   pull_request:
     branches: [ "master", "dev" ]
     paths:
-      - '.github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml'
+      - '.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml'
       - 'cm-mlops/**'
       - '!cm-mlops/**.md'
 
@@ -33,6 +33,6 @@ jobs:
         cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
         cm run script --quiet --tags=get,sys-utils-cm
         cm run script --quiet --tags=install,prebuilt,llvm --version=${{ matrix.llvm-version }}
-    - name: Test MLPerf Inference MIL C++ ResNet50
+    - name: Test MLPerf Inference MLCommons C++ ResNet50
       run: |
-        cmr "app mlperf inference cpp" -v --quiet
+        cmr "app mlperf inference mlcommons cpp" -v --quiet
@@ -440,13 +440,13 @@ variations:
 
   gptj_,build:
     deps:
-    - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1-gptj
+    - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1
     - tags: get,cmake
       version_min: "3.25.0"
 
   gptj_,build_engine:
     deps:
-    - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1-gptj
+    - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1
     - tags: get,cmake
       version_min: "3.25.0"
 

@@ -175,6 +175,9 @@ def preprocess(i):
             return {'return':0}
 
     elif make_command == "preprocess_data":
+        if env['CM_MODEL'] == "rnnt":
+            cmds.append(f"rm -rf {os.path.join(env['MLPERF_SCRATCH_PATH'], 'preprocessed_data', 'rnnt_dev_clean_500_raw')}")
+            cmds.append(f"rm -rf {os.path.join(env['MLPERF_SCRATCH_PATH'], 'preprocessed_data', 'rnnt_train_clean_512_wav')}")
         cmds.append(f"make preprocess_data BENCHMARKS='{model_name}'")
 
     else:

@@ -237,9 +237,18 @@ variations:
 
   nvidia-original:
     docker:
+      interactive: True
       extra_run_args: ' --runtime=nvidia --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined'
       base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v3.1-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-l4-public
       docker:os_version: "20.04"
+      deps:
+        - tags: get,mlperf,inference,nvidia,scratch,space
+        - tags: get,nvidia-docker
+      mounts:
+        - "${{ CM_CUDNN_TAR_FILE_PATH }}:${{ CM_CUDNN_TAR_FILE_PATH }}"
+        - "${{ CM_TENSORRT_TAR_FILE_PATH }}:${{ CM_TENSORRT_TAR_FILE_PATH }}"
+        - "${{ CUDA_RUN_FILE_LOCAL_PATH }}:${{ CUDA_RUN_FILE_LOCAL_PATH }}"
+        - "${{ MLPERF_SCRATCH_PATH }}:${{ MLPERF_SCRATCH_PATH }}"
     default_variations:
       backend: tensorrt
       device: cuda
@@ -271,7 +280,7 @@ variations:
          - nvidia-original-mlperf-inference
          - nvidia-harness
          - mlperf-inference-implementation
-        tags: reproduce,mlperf,nvidia,inference
+        tags: reproduce,mlperf,nvidia,inference,_run_harness
         skip_if_env:
           CM_SKIP_RUN:
             - yes
@@ -301,6 +310,8 @@ variations:
             - yes
     env:
       CM_MLPERF_IMPLEMENTATION: intel
+    docker:
+      interactive: True
 
   qualcomm:
     alias: kilt
@@ -322,6 +333,8 @@ variations:
             - yes
     env:
       CM_MLPERF_IMPLEMENTATION: qualcomm
+    docker:
+      interactive: True
 
   kilt,qualcomm,resnet50:
     default_variations:
@@ -1161,12 +1174,21 @@ gui:
   title: "CM GUI for the MLPerf inference benchmark"
 
 docker:
+  deps:
+    - tags: get,mlperf,inference,results,dir
+    - tags: get,mlperf,inference,submission,dir
+  mounts:
+   - "${{ IMAGENET_PATH }}:/data/imagenet-val"
+   - "${{ CM_MLPERF_INFERENCE_RESULTS_DIR }}:${{ CM_MLPERF_INFERENCE_RESULTS_DIR }}"
+   - "${{ CM_MLPERF_INFERENCE_SUBMISSION_DIR }}:${{ CM_MLPERF_INFERENCE_SUBMISSION_DIR }}"
+   - "${{ RESULTS_DIR }}:/home/cmuser/results_dir"
+   - "${{ SUBMISSION_DIR }}:/home/cmuser/submission_dir"
+   - "${{ DLRM_DATA_PATH }}:/home/mlperf_inf_dlrmv2"
   skip_run_cmd: 'no'
   shm_size: '32gb'
   extra_run_args: ' --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined'
   docker_os: ubuntu
   docker_real_run: False
-  interactive: True
   docker_os_version: '22.04'
   docker_input_mapping:
     imagenet_path: IMAGENET_PATH

@@ -15,7 +15,7 @@ def preprocess(i):
     env = i['env']
     state = i['state']
 
-    if env.get('CM_MLPERF_IMPLEMENTATION', '') == 'nvidia-original':
+    if env.get('CM_MLPERF_IMPLEMENTATION', '') == 'nvidia':
         if env.get('CM_NVIDIA_GPU_NAME', '') in [ "rtx_4090", "a100", "t4", "l4", "orin", "custom" ]:
             env['CM_NVIDIA_HARNESS_GPU_VARIATION'] = "_" + env['CM_NVIDIA_GPU_NAME']
             env['CM_NVIDIA_GPU_MEMORY'] = ''

@@ -204,9 +204,14 @@ versions:
         tags: _version.4_0
     deps:
       - tags: install,nccl,libs,_cuda
-      - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1-gptj
+      - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1
         names:
         - pytorch
+        - torch
+      - tags: install,torchvision,from.src,_for-nvidia-mlperf-inference-v3.1
+        names:
+        - pytorchvision
+        - torchvision
 
 docker:
   skip_run_cmd: 'no'
@@ -235,8 +240,8 @@ docker:
     - tags: get,mlperf,inference,submission,dir
     - tags: get,nvidia-docker
   pre_run_cmds:
-    - cm pull repo mlcommons@ck
-  run_cmd_prefix: sudo apt remove -y cmake && cm pull repo mlcommons@ck
+    - cm pull repo
+  run_cmd_prefix: sudo apt remove -y cmake
   mounts:
    - "${{ IMAGENET_PATH }}:/data/imagenet-val"
    - "${{ CM_MLPERF_INFERENCE_RESULTS_DIR }}:${{ CM_MLPERF_INFERENCE_RESULTS_DIR }}"

@@ -883,7 +883,7 @@
         }
       ],
       "default_env": {
-        "CM_GENERIC_PYTHON_PIP_UNINSTALL_DEPS": "torch"
+        "CM_GENERIC_PYTHON_PIP_UNINSTALL_DEPS1": "torch"
       },
       "env": {
         "CM_GENERIC_PYTHON_PACKAGE_NAME": "torch",

@@ -1,6 +1,6 @@
 #!/bin/bash
 
-if [[ ${CM_GENERIC_PYTHON_PACKAGE_VARIANT} == "nvidia-apex" ]]; then
+if [[ ${CM_GENERIC_PYTHON_PACKAGE_VARIANT} == "nvidia-apex-depreciated" ]]; then
   cd ${CM_GIT_REPO_CHECKOUT_PATH}
   cmd="${CM_PYTHON_BIN_WITH_PATH} -m pip install -v --disable-pip-version-check --global-option=\"--cpp_ext\" --global-option=\"--cuda_ext\" ./"
   echo $cmd

@@ -220,7 +220,7 @@
         }
       ]
     },
-    "for-nvidia-mlperf-inference-v3.1-gptj": {
+    "for-nvidia-mlperf-inference-v3.1": {
       "base": [
         "sha.b5021ba9",
         "cuda"

@@ -1,17 +1,22 @@
 #!/bin/bash
 
 CUR_DIR=$PWD
-rm -rf pytorch
-cp -r ${CM_PYTORCH_SRC_REPO_PATH} pytorch
-cd pytorch
-git submodule sync
-git submodule update --init --recursive
-rm -rf build
+if [[ ! -e pytorch/dist/torch*.whl ]]; then
+  rm -rf pytorch
+  cp -r ${CM_PYTORCH_SRC_REPO_PATH} pytorch
+  cd pytorch
+  git submodule sync
+  git submodule update --init --recursive
+  rm -rf build
+
+  ${CM_PYTHON_BIN_WITH_PATH} -m pip install -r requirements.txt
+  test $? -eq 0 || exit $?
+  ${CM_PYTHON_BIN_WITH_PATH} setup.py bdist_wheel
+  test $? -eq 0 || exit $?
+else
+  cd pytorch
+fi
 
-${CM_PYTHON_BIN_WITH_PATH} -m pip install -r requirements.txt
-if [ "${?}" != "0" ]; then exit $?; fi
-${CM_PYTHON_BIN_WITH_PATH} setup.py bdist_wheel
-test $? -eq 0 || exit $?
 cd dist
 ${CM_PYTHON_BIN_WITH_PATH} -m pip install torch-2.*linux_x86_64.whl
 test $? -eq 0 || exit $?
@@ -0,0 +1,140 @@
+{
+  "alias": "install-torchvision-from-src",
+  "automation_alias": "script",
+  "automation_uid": "5b4e0237da074764",
+  "cache": true,
+  "category": "Compiler automation",
+  "deps": [
+    {
+      "tags": "detect,os"
+    },
+    {
+      "tags": "detect,cpu"
+    },
+    {
+      "names": [
+        "python",
+        "python3"
+      ],
+      "skip_if_env": {
+        "CM_CONDA_ENV": [
+          "yes"
+        ]
+      },
+      "tags": "get,python3"
+    },
+    {
+      "env": {
+        "CM_GIT_CHECKOUT_PATH_ENV_NAME": "CM_PYTORCH_VISION_SRC_REPO_PATH"
+      },
+      "extra_cache_tags": "pytorchvision,torchvision,torchvision-src,src,pytorchvision-src,pytorchvision-src-repo",
+      "names": [
+        "pytorchision-src-repo",
+        "torchision-src-repo"
+      ],
+      "tags": "get,git,repo",
+      "update_tags_from_env_with_prefix": {
+        "_branch.": [
+          "CM_GIT_CHECKOUT"
+        ],
+        "_repo.": [
+          "CM_GIT_URL"
+        ],
+        "_sha.": [
+          "CM_GIT_CHECKOUT_SHA"
+        ],
+        "_tag.": [
+          "CM_GIT_CHECKOUT_TAG"
+        ]
+      }
+    }
+  ],
+  "env": {
+    "CM_GIT_URL": "https://github.com/pytorch/vision"
+  },
+  "name": "Build pytorchvision from sources",
+  "new_env_keys": [
+    "CM_PYTORCHVISION_*"
+  ],
+  "prehook_deps": [],
+  "sort": 1000,
+  "tags": [
+    "install",
+    "get",
+    "src",
+    "from.src",
+    "pytorchvision",
+    "torchvision",
+    "src-pytorchvision"
+  ],
+  "uid": "68b855780d474546",
+  "variations": {
+    "branch.#": {
+      "env": {
+        "CM_GIT_CHECKOUT": "#"
+      }
+    },
+    "cuda": {
+      "deps": [
+        {
+          "names": [
+            "cuda"
+          ],
+          "tags": "get,cuda,_cudnn"
+        }
+      ],
+      "env": {
+        "CUDA_HOME": "<<<CM_CUDA_INSTALLED_PATH>>>",
+        "CUDA_NVCC_EXECUTABLE": "<<<CM_NVCC_BIN_WITH_PATH>>>",
+        "CUDNN_INCLUDE_PATH": "<<<CM_CUDA_PATH_INCLUDE_CUDNN>>>",
+        "CUDNN_LIBRARY_PATH": "<<<CM_CUDA_PATH_LIB_CUDNN>>>",
+        "USE_CUDA": "1",
+        "USE_CUDNN": "1",
+        "TORCH_CUDA_ARCH_LIST": "Ampere Ada Hopper",
+        "TORCH_CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=1"
+      }
+    },
+    "for-nvidia-mlperf-inference-v3.1": {
+      "base": [
+        "sha.657027f3",
+        "cuda"
+      ],
+      "deps": [
+        {
+          "tags": "install,pytorch,from.src,_for-nvidia-mlperf-inference-v3.1"
+	}
+      ],
+      "env": {
+      }
+    },
+    "repo.#": {
+      "env": {
+        "CM_GIT_URL": "#"
+      },
+      "group": "repo"
+    },
+    "repo.https://github.com/pytorch/vision": {
+      "default": true,
+      "env": {
+        "CM_GIT_URL": "https://github.com/pytorch/vision"
+      },
+      "group": "repo"
+    },
+    "sha.#": {
+      "env": {
+        "CM_GIT_CHECKOUT_SHA": "#"
+      }
+    },
+    "tag.#": {
+      "ad": {
+        "pytorch-src-repo": {
+          "tags": "_no-recurse-submodules,_full-history"
+        }
+      },
+      "env": {
+        "CM_GIT_CHECKOUT_TAG": "#"
+      }
+    }
+  },
+  "versions": {}
+}
@@ -0,0 +1,26 @@
+from cmind import utils
+import os
+
+def preprocess(i):
+
+    os_info = i['os_info']
+
+    if os_info['platform'] == 'windows':
+        return {'return':1, 'error': 'Windows is not supported in this script yet'}
+
+    env = i['env']
+
+    if env.get('CM_MLPERF_INFERENCE_INTEL', '') == "yes":
+        i['run_script_input']['script_name'] = "run-intel-mlperf-inference-v3_1"
+        run_cmd="CC=clang CXX=clang++ USE_CUDA=OFF python -m pip install -e . "
+
+        env['CM_RUN_CMD'] = run_cmd
+
+    automation = i['automation']
+
+    recursion_spaces = i['recursion_spaces']
+
+    return {'return':0}
+
+def postprocess(i):
+    return {'return':0}
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+CUR_DIR=$PWD
+rm -rf pytorchvision
+cp -r ${CM_PYTORCH_VISION_SRC_REPO_PATH} pytorchvision
+cd pytorchvision
+test "${?}" -eq "0" || exit $?
+rm -rf build
+
+python setup.py bdist_wheel
+test "${?}" -eq "0" || exit $?
+cd dist
+python3 -m pip install torchvision*linux_x86_64.whl
+test "${?}" -eq "0" || exit $?