nod-ai · saienduri · Mar 25, 2024 · Mar 25, 2024 · Mar 25, 2024
diff --git a/.github/workflows/turbine_tank.yml b/.github/workflows/turbine_tank.yml
@@ -0,0 +1,71 @@
+# Copyright 2024 Advanced Micro Devices
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+name: Turbine Tank
+on:
+  workflow_dispatch:
+  pull_request:
+
+concurrency:
+  # A PR number if a pull request and otherwise the commit hash. This cancels
+  # queued and in-progress runs for the same PR (presubmit) or commit
+  # (postsubmit). The workflow name is prepended to avoid conflicts between
+  # different workflows.
+  group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
+  cancel-in-progress: true
+
+jobs:
+  turbine:
+    strategy:
+      matrix:
+        os: [nodai-amdgpu-w7900-x86-64]
+    runs-on: ${{matrix.os}}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v2
+        with:
+          repository: nod-ai/SHARK-Turbine
+          ref: tank-update
+          path: turbine
+
+  e2eshark:
+    needs: [turbine]
+    strategy:
+      matrix:
+        version: [3.11]
+        os: [nodai-amdgpu-w7900-x86-64]
+    runs-on: ${{matrix.os}}
+    env:
+      E2E_VENV_DIR: ${{ github.workspace }}/test-suite_venv
+      AZURE_CONNECTION_STRING: ${{ secrets.TANK_CONNECTION_STRING }}
+      AZURE_CONTAINER_NAME: "tankturbine"
+      TURBINE_TANK_ACTION: "upload"
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v2
+        with:
+          repository: nod-ai/SHARK-TestSuite
+          path: test-suite
+
+      - name: Build
+        run: |
+          python -m venv ${E2E_VENV_DIR}
+          source ${E2E_VENV_DIR}/bin/activate
+          pip install --upgrade pip
+          pip install --index-url https://download.pytorch.org/whl/cpu \
+            -r ../turbine/core/pytorch-cpu-requirements.txt \
+            -r ../turbine/core/torchvision-requirements.txt
+          pip install --upgrade -r ../turbine/core/requirements.txt
+          pip install -e ../turbine/core[testing]
+          pip install -e ../turbine/models
+        working-directory: ./test-suite
+
+      - name: Run Turbine Tank
+        run: |
+          source ${E2E_VENV_DIR}/bin/activate
+          cd turbine_tank
+          python run_tank.py
+        working-directory: ./test-suite
diff --git a/turbine_tank/flows_util.py b/turbine_tank/flows_util.py
@@ -69,7 +69,7 @@ def main(self, inp=AbstractTensor(*input.shape, dtype=input.dtype)):
         f.write(module_str)
 
     model_name_upload = model_name.replace("/", "_")
-    turbine_tank.uploadToBlobStorage(
+    blob_name = turbine_tank.uploadToBlobStorage(
         str(os.path.abspath(mlir_name)),
         f"{model_name_upload}/{model_name_upload}-params.mlir",
     )
@@ -95,5 +95,10 @@ def main(self, inp=AbstractTensor(*input.shape, dtype=input.dtype)):
     os.remove(vmfb_name + ".vmfb")
     os.remove(weight_name)
 
+    if err < expected_err:
+        new_blob_name = blob_name.split(".")
+        new_blob_name = new_blob_name[0] + "-pass.mlir"
+        turbine_tank.changeBlobName(blob_name, new_blob_name)
+
     # accuracy
     assert err < expected_err
diff --git a/turbine_tank/tank_test.py b/turbine_tank/tank_test.py
@@ -69,7 +69,7 @@
             False,
             marks=pytest.mark.xfail(reason="iree-compile fails"),
         ),
-        ("lmsys/vicuna-13b-v1.3", "hf", 5e-05, True),
+        # ("lmsys/vicuna-13b-v1.3", "hf", 5e-05, True),
         pytest.param(
             "microsoft/phi-1_5",
             "hf_causallm",
@@ -84,13 +84,13 @@
             True,
             marks=pytest.mark.xfail(reason="correctness issue"),
         ),  # nan error reported (correctness issue)
-        pytest.param(
-            "mosaicml/mpt-30b",
-            "hf_causallm",
-            -1,
-            False,
-            marks=pytest.mark.xfail(reason="iree-compile fails"),
-        ),
+        # pytest.param(
+        #     "mosaicml/mpt-30b",
+        #     "hf_causallm",
+        #     -1,
+        #     False,
+        #     marks=pytest.mark.xfail(reason="iree-compile fails"),
+        # ),
         ("stabilityai/stablelm-3b-4e1t", "hf_causallm", 0.0004, True),
     ],
 )
@@ -121,7 +121,7 @@ def test_all_models(model_name, model_type, expected_err, compile_to_vmfb):
     model = HFTransformerBuilder(
         example_input=input,
         hf_id=model_name,
-        upload_ir=True,
+        upload_ir=False,
         model=torch_model,
         model_type=model_type,
         compile_to_vmfb=compile_to_vmfb,
@@ -131,5 +131,5 @@ def test_all_models(model_name, model_type, expected_err, compile_to_vmfb):
     flows_util.param_flow(
         model, model_name, model_type, input, out, compile_to_vmfb, expected_err
     )
-    # inline weights
-    flows_util.classic_flow(model, model_name, input, out, compile_to_vmfb, expected_err)
+    # inline weights. not doing in CI due to space/time logistics (will add when more machines)
+    # flows_util.classic_flow(model, model_name, input, out, compile_to_vmfb, expected_err)