From 961053cfdeb35ce09bb5e464598d7433fcff0961 Mon Sep 17 00:00:00 2001
From: Vincent Tang <vtang@tenstorrent.com>
Date: Thu, 13 Jun 2024 22:02:21 +0000
Subject: [PATCH] #8729: xdist + reset mechanism on fd nightly, model perf, all
 t3k (except profiler) - enable timeout mechanism by default if using xdist,
 use 'metal-timeout' flag to enable if not using xdist - increase GH actions
 timeout for xdist (review) - get timings of each test and set global timeout
 to 5 mins (review) - add custom timeouts to nightly + t3k pipelines +
 post-commit (review)

---
 ...-dispatch-full-regressions-and-models.yaml |  2 +-
 .github/workflows/perf-models.yaml            |  2 +-
 .github/workflows/t3000-demo-tests.yaml       |  1 +
 .github/workflows/t3000-model-perf-tests.yaml |  2 +-
 conftest.py                                   | 51 ++++++++---------
 pytest.ini                                    |  4 +-
 tests/scripts/run_performance.sh              | 25 +++++----
 tests/scripts/run_tests.sh                    |  2 +-
 .../single_card/nightly/run_common_models.sh  |  9 ++-
 .../single_card/nightly/run_gs_only.sh        | 11 +++-
 tests/scripts/single_card/nightly/run_ttnn.sh |  9 ++-
 .../single_card/nightly/run_wh_b0_only.sh     | 11 +++-
 .../single_card/nightly/run_wh_b0_unstable.sh |  9 ++-
 tests/scripts/t3000/run_t3000_demo_tests.sh   | 37 ++++++++----
 .../scripts/t3000/run_t3000_frequent_tests.sh | 48 ++++++++--------
 .../t3000/run_t3000_model_perf_tests.sh       | 15 +++--
 tests/scripts/t3000/run_t3000_unit_tests.sh   | 56 ++++++++++---------
 tt_metal/python_env/requirements-dev.txt      |  1 +
 18 files changed, 178 insertions(+), 117 deletions(-)

diff --git a/.github/workflows/fast-dispatch-full-regressions-and-models.yaml b/.github/workflows/fast-dispatch-full-regressions-and-models.yaml
index 115b9415452..b6dc4f619c5 100644
--- a/.github/workflows/fast-dispatch-full-regressions-and-models.yaml
+++ b/.github/workflows/fast-dispatch-full-regressions-and-models.yaml
@@ -26,7 +26,7 @@ jobs:
             { name: "N300 WH-only models", arch: wormhole_b0, cmd: tests/scripts/single_card/nightly/run_wh_b0_only.sh, timeout: 30 },
             { name: "API tests GS", arch: grayskull, cmd: ./tests/scripts/run_tests.sh --tt-arch grayskull --pipeline-type frequent_api --dispatch-mode fast, timeout: 40 },
             { name: "API tests N300 WH B0", arch: wormhole_b0, cmd: ./tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type frequent_api --dispatch-mode fast, timeout: 40 },
-            { name: "[Unstable] N300 models", arch: wormhole_b0, cmd: tests/scripts/single_card/nightly/run_wh_b0_unstable.sh, timeout: 35 },
+            { name: "[Unstable] N300 models", arch: wormhole_b0, cmd: tests/scripts/single_card/nightly/run_wh_b0_unstable.sh, timeout: 45 },
           ]
     name: FD ${{ matrix.test-group.name }} ${{ matrix.test-group.arch }}
     env:
diff --git a/.github/workflows/perf-models.yaml b/.github/workflows/perf-models.yaml
index 8c423e865c1..f5905175e7e 100644
--- a/.github/workflows/perf-models.yaml
+++ b/.github/workflows/perf-models.yaml
@@ -52,7 +52,7 @@ jobs:
       - uses: ./.github/actions/install-python-deps
       - name: Run performance regressions
         id: performance_tests
-        timeout-minutes: 30
+        timeout-minutes: 40
         run: |
           source ${{ github.workspace }}/python_env/bin/activate
           ./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type ${{ matrix.model-type }}_models_performance_${{ matrix.test-info.machine-type }}
diff --git a/.github/workflows/t3000-demo-tests.yaml b/.github/workflows/t3000-demo-tests.yaml
index c7c7d218806..bcaf4135df9 100644
--- a/.github/workflows/t3000-demo-tests.yaml
+++ b/.github/workflows/t3000-demo-tests.yaml
@@ -45,6 +45,7 @@ jobs:
         run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
       - uses: ./.github/actions/install-python-deps
       - name: Run demo regression tests
+        shell: bash {0}
         timeout-minutes: 180
         run: |
           source ${{ github.workspace }}/python_env/bin/activate
diff --git a/.github/workflows/t3000-model-perf-tests.yaml b/.github/workflows/t3000-model-perf-tests.yaml
index 7a4ad87cd8c..a4588eaf22b 100644
--- a/.github/workflows/t3000-model-perf-tests.yaml
+++ b/.github/workflows/t3000-model-perf-tests.yaml
@@ -61,7 +61,7 @@ jobs:
         run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
       - uses: ./.github/actions/install-python-deps
       - name: Run model perf regression tests
-        timeout-minutes: 60
+        timeout-minutes: 75
         run: |
           source ${{ github.workspace }}/python_env/bin/activate
           cd $TT_METAL_HOME
diff --git a/conftest.py b/conftest.py
index c6339ee3ae1..2a8c0b44d5f 100644
--- a/conftest.py
+++ b/conftest.py
@@ -85,8 +85,6 @@ def device(request, device_params):
     import tt_lib as ttl
 
     device_id = request.config.getoption("device_id")
-
-    request.node.device_ids = [device_id]
     request.node.pci_ids = [ttl.device.GetPCIeDeviceID(device_id)]
 
     num_devices = ttl.device.GetNumPCIeDevices()
@@ -108,8 +106,6 @@ def pcie_devices(request, device_params):
 
     num_devices = ttl.device.GetNumPCIeDevices()
     device_ids = [i for i in range(num_devices)]
-
-    request.node.device_ids = device_ids
     request.node.pci_ids = [ttl.device.GetPCIeDeviceID(i) for i in device_ids]
 
     # Get only physical devices
@@ -129,8 +125,6 @@ def all_devices(request, device_params):
 
     num_devices = ttl.device.GetNumAvailableDevices()
     device_ids = [i for i in range(num_devices)]
-
-    request.node.device_ids = device_ids
     request.node.pci_ids = [ttl.device.GetPCIeDeviceID(i) for i in device_ids]
 
     # Get only physical devices
@@ -155,7 +149,6 @@ def device_mesh(request, silicon_arch_name, silicon_arch_wormhole_b0, device_par
     except (ValueError, AttributeError):
         num_devices_requested = len(device_ids)
 
-    request.node.device_ids = device_ids[:num_devices_requested]
     request.node.pci_ids = [ttl.device.GetPCIeDeviceID(i) for i in device_ids[:num_devices_requested]]
 
     device_mesh = ttnn.open_device_mesh(
@@ -183,7 +176,6 @@ def pcie_device_mesh(request, silicon_arch_name, silicon_arch_wormhole_b0, devic
     except (ValueError, AttributeError):
         num_pcie_devices_requested = len(device_ids)
 
-    request.node.device_ids = device_ids[:num_pcie_devices_requested]
     request.node.pci_ids = [ttl.device.GetPCIeDeviceID(i) for i in device_ids[:num_pcie_devices_requested]]
 
     device_mesh = ttnn.open_device_mesh(
@@ -213,7 +205,6 @@ def t3k_device_mesh(request, silicon_arch_name, silicon_arch_wormhole_b0, device
     except (ValueError, AttributeError):
         num_devices_requested = len(device_ids)
 
-    request.node.device_ids = device_ids[:num_devices_requested]
     request.node.pci_ids = [ttl.device.GetPCIeDeviceID(i) for i in device_ids[:num_devices_requested]]
 
     device_mesh = ttnn.open_device_mesh(
@@ -334,13 +325,18 @@ def pytest_addoption(parser):
     )
     parser.addoption("--cli-input", action="store", default=None, help="Enter prompt if --input-method=cli")
     parser.addoption(
-        "--metal-cleanup",
+        "--metal-timeout",
         action="store",
         default=None,
         help="Enable process timeout",
     )
 
 
+@pytest.fixture
+def input_path(request):
+    return request.config.getoption("--input-path")
+
+
 def pytest_generate_tests(metafunc):
     """
     This is not a standard docstring.
@@ -473,14 +469,15 @@ def pytest_runtest_makereport(item, call):
 @pytest.hookimpl(hookwrapper=True)
 def pytest_runtest_teardown(item, nextitem):
     yield
-    metal_cleanup_enabled = item.config.getoption("--metal-cleanup")
-    if metal_cleanup_enabled is not None:
+    metal_timeout_enabled = item.config.getoption("--metal-timeout")
+    using_xdist = int(os.getenv("PYTEST_XDIST_WORKER_COUNT", "0"))
+
+    if metal_timeout_enabled is not None or using_xdist:
         report = item.stash[phase_report_key]
         test_failed = report.get("call", None) and report["call"].failed
         if test_failed:
-            logger.info(f"In custom teardown, open device ids: {item.device_ids} {set(item.pci_ids)}")
-            # reset_tensix(set(item.pci_ids))
-            reset_tensix()
+            logger.info(f"In custom teardown, open device ids: {set(item.pci_ids)}")
+            reset_tensix(set(item.pci_ids))
 
 
 # This is overriding the timer setup hook from pytest-timeout
@@ -488,10 +485,12 @@ def pytest_runtest_teardown(item, nextitem):
 # At timeout, the process kills it's parent (the test process) and then itself
 @pytest.hookimpl(tryfirst=True)
 def pytest_timeout_set_timer(item, settings):
-    metal_timeout_enabled = item.config.getoption("--metal-cleanup")
-    if metal_timeout_enabled is not None:
+    metal_timeout_enabled = item.config.getoption("--metal-timeout")
+    using_xdist = int(os.getenv("PYTEST_XDIST_WORKER_COUNT", "0"))
+
+    if metal_timeout_enabled is not None or using_xdist:
         parent_pid = os.getpid()
-        logger.info(f"Metal timeout {settings.timeout} seconds")
+        logger.info(f"Metal timeout {settings.timeout} seconds {parent_pid} for {item.nodeid}")
 
         def get_parent_status():
             try:
@@ -501,12 +500,15 @@ def get_parent_status():
             return parent.status()
 
         def run_timer(settings):
+            logger.info(f"Timer started for {item.nodeid}")
             dead_status = ["zombie", "dead", "already dead"]
             timeout = settings.timeout
-            while get_parent_status() not in dead_status and timeout > 0:
-                time.sleep(1)
-                timeout -= 1
-            if get_parent_status() != "already dead":
+            parent_status = "running"
+            while parent_status not in dead_status and timeout > 0:
+                time.sleep(5)
+                timeout -= 5
+                parent_status = get_parent_status()
+            if parent_status != "already dead":
                 logger.info(f"Timing out test case")
                 os.kill(parent_pid, signal.SIGKILL)
             logger.info(f"Killing timer")
@@ -542,10 +544,9 @@ def reset_tensix(tt_open_devices=None):
         smi_reset_result = run_process_and_get_result(f"/opt/tt_metal_infra/scripts/ci/{arch}/reset.sh")
     else:
         tt_open_devices_str = ",".join([str(i) for i in tt_open_devices])
-        check_smi = run_process_and_get_result("tt-smi-metal -h")
-        logger.info(f"Check tt-smi-metal exists: {check_smi.returncode}")
+        check_smi_metal = run_process_and_get_result("tt-smi-metal -h")
         logger.info(f"Running reset for pci devices: {tt_open_devices_str}")
-        if check_smi.returncode > 0:
+        if check_smi_metal.returncode > 0:
             logger.info(f"Test failed - resetting {arch} with tt-smi")
             smi_reset_result = run_process_and_get_result(f"tt-smi -r {tt_open_devices_str}")
         else:
diff --git a/pytest.ini b/pytest.ini
index 593b9ce3930..699ef215218 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,7 +1,7 @@
 [pytest]
-timeout = 2400
+timeout = 300
 minversion = 7.2
-addopts = --import-mode=importlib -vs -rA
+addopts = --import-mode=importlib -vvs -rA --durations=0
 empty_parameter_set_mark = skip
 markers =
     post_commit: mark tests to run on post-commit
diff --git a/tests/scripts/run_performance.sh b/tests/scripts/run_performance.sh
index b07cc38166c..dc9e7c35ae7 100755
--- a/tests/scripts/run_performance.sh
+++ b/tests/scripts/run_performance.sh
@@ -1,6 +1,6 @@
 #/bin/bash
 
-set -eo pipefail
+# set -eo pipefail
 
 if [[ -z "$TT_METAL_HOME" ]]; then
   echo "Must provide TT_METAL_HOME in environment" 1>&2
@@ -11,19 +11,19 @@ run_perf_models_other() {
     local tt_arch=$1
     local test_marker=$2
 
-    env pytest tests/ttnn/integration_tests/resnet/test_performance.py -m $test_marker
+    env pytest -n auto tests/ttnn/integration_tests/resnet/test_performance.py -m $test_marker
 
-    env pytest tests/ttnn/integration_tests/bert/test_performance.py -m $test_marker
+    env pytest -n auto tests/ttnn/integration_tests/bert/test_performance.py -m $test_marker
 
-    env pytest models/demos/ttnn_falcon7b/tests -m $test_marker
+    env pytest -n auto models/demos/ttnn_falcon7b/tests -m $test_marker
 
     # Separate calls since we can't mix switching between number of cqs
-    env pytest models/demos/resnet/tests/test_perf_resnet.py -m $test_marker
-    env pytest models/demos/resnet/tests/test_perf_resnet_2cqs.py -m $test_marker
+    env pytest -n auto models/demos/resnet/tests/test_perf_resnet.py -m $test_marker
+    env pytest -n auto models/demos/resnet/tests/test_perf_resnet_2cqs.py -m $test_marker
 
-    env pytest tests/ttnn/integration_tests/whisper/test_performance.py -m $test_marker
+    env pytest -n auto tests/ttnn/integration_tests/whisper/test_performance.py -m $test_marker
 
-    env pytest models/demos/metal_BERT_large_11/tests -m $test_marker
+    env pytest -n auto models/demos/metal_BERT_large_11/tests -m $test_marker
 
     ## Merge all the generated reports
     env python models/perf/merge_perf_results.py
@@ -33,13 +33,13 @@ run_perf_models_llm_javelin() {
     local tt_arch=$1
     local test_marker=$2
 
-    env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/falcon7b/tests -m $test_marker
+    env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests -m $test_marker
 
     if [ "$tt_arch" == "wormhole_b0" ]; then
-        env pytest models/demos/mamba/tests -m $test_marker
+        env pytest -n auto models/demos/mamba/tests -m $test_marker
     fi
 
-    env  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/wormhole/mistral7b/tests -m $test_marker
+    env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/wormhole/mistral7b/tests -m $test_marker
 
     ## Merge all the generated reports
     env python models/perf/merge_perf_results.py
@@ -50,7 +50,7 @@ run_perf_models_cnn_javelin() {
     local test_marker=$2
 
     # Run tests
-    env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest tests/device_perf_tests/stable_diffusion -m $test_marker
+    env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto tests/device_perf_tests/stable_diffusion -m $test_marker
     #env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/experimental/functional_unet/tests -m $test_marker
 
     ## Merge all the generated reports
@@ -58,6 +58,7 @@ run_perf_models_cnn_javelin() {
 }
 
 run_device_perf_models() {
+    set -eo pipefail
     local test_marker=$1
 
     env pytest tests/device_perf_tests/stable_diffusion -m $test_marker
diff --git a/tests/scripts/run_tests.sh b/tests/scripts/run_tests.sh
index 334b68b71fd..ebd25264b9c 100755
--- a/tests/scripts/run_tests.sh
+++ b/tests/scripts/run_tests.sh
@@ -81,7 +81,7 @@ run_frequent_api_pipeline_tests() {
         ./tests/scripts/run_python_api_unit_tests.sh
     else
         if [[ $tt_arch == "wormhole_b0" ]]; then
-            pytest  tests/tt_eager/python_api_testing/unit_testing/misc/test_all_gather.py -k nightly
+            pytest -n auto tests/tt_eager/python_api_testing/unit_testing/misc/test_all_gather.py -k nightly
         else
             echo "API tests are not available for fast dispatch because they're already covered in post-commit"
         fi
diff --git a/tests/scripts/single_card/nightly/run_common_models.sh b/tests/scripts/single_card/nightly/run_common_models.sh
index 17ca8c4d3cf..19e090065f3 100755
--- a/tests/scripts/single_card/nightly/run_common_models.sh
+++ b/tests/scripts/single_card/nightly/run_common_models.sh
@@ -1,12 +1,17 @@
 #/bin/bash
 
-set -eo pipefail
+# set -eo pipefail
 
 if [[ -z "$TT_METAL_HOME" ]]; then
   echo "Must provide TT_METAL_HOME in environment" 1>&2
   exit 1
 fi
+fail=0
 
 echo "Running common models for archs"
 
-env pytest tests/nightly/common_models/
+env pytest -n auto tests/nightly/common_models/ ; fail+=$?
+
+if [[ $fail -ne 0 ]]; then
+  exit 1
+fi
diff --git a/tests/scripts/single_card/nightly/run_gs_only.sh b/tests/scripts/single_card/nightly/run_gs_only.sh
index c5bcc9f9745..bad5b98ea40 100755
--- a/tests/scripts/single_card/nightly/run_gs_only.sh
+++ b/tests/scripts/single_card/nightly/run_gs_only.sh
@@ -1,14 +1,19 @@
 #/bin/bash
 
-set -eo pipefail
+# set -eo pipefail
 
 if [[ -z "$TT_METAL_HOME" ]]; then
   echo "Must provide TT_METAL_HOME in environment" 1>&2
   exit 1
 fi
+fail=0
 
 echo "Running model nightly tests for GS only"
 
-env pytest models/demos/resnet/tests/test_metal_resnet50_performant.py
+env pytest -n auto models/demos/resnet/tests/test_metal_resnet50_performant.py ; fail+=$?
 
-env pytest models/demos/resnet/tests/test_metal_resnet50_2cqs_performant.py
+env pytest -n auto models/demos/resnet/tests/test_metal_resnet50_2cqs_performant.py ; fail+=$?
+
+if [[ $fail -ne 0 ]]; then
+  exit 1
+fi
diff --git a/tests/scripts/single_card/nightly/run_ttnn.sh b/tests/scripts/single_card/nightly/run_ttnn.sh
index f0bb3f9cadc..a41836173de 100755
--- a/tests/scripts/single_card/nightly/run_ttnn.sh
+++ b/tests/scripts/single_card/nightly/run_ttnn.sh
@@ -1,12 +1,17 @@
 #/bin/bash
 
-set -eo pipefail
+# set -eo pipefail
 
 if [[ -z "$TT_METAL_HOME" ]]; then
   echo "Must provide TT_METAL_HOME in environment" 1>&2
   exit 1
 fi
+fail=0
 
 echo "Running ttnn nightly tests for GS only"
 
-env pytest tests/ttnn/integration_tests -m "not models_performance_bare_metal and not models_device_performance_bare_metal"
+env pytest -n auto tests/ttnn/integration_tests -m "not models_performance_bare_metal and not models_device_performance_bare_metal" ; fail+=$?
+
+if [[ $fail -ne 0 ]]; then
+  exit 1
+fi
diff --git a/tests/scripts/single_card/nightly/run_wh_b0_only.sh b/tests/scripts/single_card/nightly/run_wh_b0_only.sh
index d30894713c1..5ae9f0657cb 100755
--- a/tests/scripts/single_card/nightly/run_wh_b0_only.sh
+++ b/tests/scripts/single_card/nightly/run_wh_b0_only.sh
@@ -1,12 +1,17 @@
 #/bin/bash
 
-set -eo pipefail
+# set -eo pipefail
 
 if [[ -z "$TT_METAL_HOME" ]]; then
   echo "Must provide TT_METAL_HOME in environment" 1>&2
   exit 1
 fi
+fail=0
 
 echo "Running nightly tests for WH B0 only"
-env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest tests/nightly/wh_b0_only_eth
-env pytest tests/nightly/wh_b0_only
\ No newline at end of file
+env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto tests/nightly/wh_b0_only_eth ; fail+=$?
+env pytest -n auto tests/nightly/wh_b0_only ; fail+=$?
+
+if [[ $fail -ne 0 ]]; then
+  exit 1
+fi
diff --git a/tests/scripts/single_card/nightly/run_wh_b0_unstable.sh b/tests/scripts/single_card/nightly/run_wh_b0_unstable.sh
index 079087d6e69..35895a64208 100755
--- a/tests/scripts/single_card/nightly/run_wh_b0_unstable.sh
+++ b/tests/scripts/single_card/nightly/run_wh_b0_unstable.sh
@@ -1,12 +1,17 @@
 #/bin/bash
 
-set -eo pipefail
+# set -eo pipefail
 
 if [[ -z "$TT_METAL_HOME" ]]; then
   echo "Must provide TT_METAL_HOME in environment" 1>&2
   exit 1
 fi
+fail=0
 
 echo "Running unstable nightly tests for WH B0 only"
 
-SLOW_MATMULS=1 WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml env pytest tests/ttnn/integration_tests/stable_diffusion
+SLOW_MATMULS=1 WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml env pytest -n auto tests/ttnn/integration_tests/stable_diffusion ; fail+=$?
+
+if [[ $fail -ne 0 ]]; then
+    exit 1
+fi
diff --git a/tests/scripts/t3000/run_t3000_demo_tests.sh b/tests/scripts/t3000/run_t3000_demo_tests.sh
index bb2a23d9efd..d0f984dcfcb 100755
--- a/tests/scripts/t3000/run_t3000_demo_tests.sh
+++ b/tests/scripts/t3000/run_t3000_demo_tests.sh
@@ -1,60 +1,72 @@
 
 #/bin/bash
-set -eo pipefail
+# set -eo pipefail
 
 run_t3000_falcon40b_tests() {
   # Record the start time
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_falcon40b_tests"
 
   # Falcon40B prefill 60 layer end to end with 10 loops; we need 8x8 grid size
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_60_layer_t3000_prefill_10_loops.py
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_60_layer_t3000_prefill_10_loops.py ; fail+=$?
 
   # Falcon40B end to end demo (prefill + decode)
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_t3000_demo_loops.py
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_t3000_demo_loops.py ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_falcon40b_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 run_t3000_falcon7b_tests(){
   # Record the start time
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_falcon7b_tests"
 
   # Falcon7B demo (perf verification for 128/1024/2048 seq lens and output token verification)
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-perf_mode_128_stochastic_verify]
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-perf_mode_1024_stochastic_verify]
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-perf_mode_2048_stochastic_verify]
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-default_mode_1024_greedy_verify]
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-perf_mode_128_stochastic_verify] ; fail+=$?
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-perf_mode_1024_stochastic_verify] ; fail+=$?
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-perf_mode_2048_stochastic_verify] ; fail+=$?
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-default_mode_1024_greedy_verify] ; fail+=$?
 
   # Falcon7B perplexity test (prefill and decode)
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/falcon7b/tests/test_perplexity_falcon.py::test_perplexity[True-prefill_seq1024_dram]
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/falcon7b/tests/test_perplexity_falcon.py::test_perplexity[True-decode_1024_l1_sharded]
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests/test_perplexity_falcon.py::test_perplexity[True-prefill_seq1024_dram] ; fail+=$?
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests/test_perplexity_falcon.py::test_perplexity[True-decode_1024_l1_sharded] ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_falcon7b_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 run_t3000_mixtral_tests() {
   # Record the start time
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_mixtral8x7b_tests"
 
   # mixtral8x7b 8 chip demo test - 100 token generation with general weights (env flags set inside the test)
-  pytest models/demos/t3000/mixtral8x7b/demo/demo.py::test_mixtral8x7b_demo[wormhole_b0-True-general_weights]
+  pytest -n auto models/demos/t3000/mixtral8x7b/demo/demo.py::test_mixtral8x7b_demo[wormhole_b0-True-general_weights] ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_mixtral_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 run_t3000_tests() {
@@ -68,6 +80,7 @@ run_t3000_tests() {
   run_t3000_mixtral_tests
 }
 
+fail=0
 main() {
   if [[ "${BASH_SOURCE[0]}" != "${0}" ]]; then
     echo "Script is being sourced, not executing main function"
@@ -89,6 +102,10 @@ main() {
   export PYTHONPATH=$TT_METAL_HOME
 
   run_t3000_tests
+
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 main "$@"
diff --git a/tests/scripts/t3000/run_t3000_frequent_tests.sh b/tests/scripts/t3000/run_t3000_frequent_tests.sh
index 55c7bb20370..203ffd436d6 100755
--- a/tests/scripts/t3000/run_t3000_frequent_tests.sh
+++ b/tests/scripts/t3000/run_t3000_frequent_tests.sh
@@ -1,6 +1,6 @@
 
 #/bin/bash
-set -eo pipefail
+# set -eo pipefail
 
 run_t3000_ethernet_tests() {
   # Record the start time
@@ -8,8 +8,8 @@ run_t3000_ethernet_tests() {
 
   echo "LOG_METAL: Running run_t3000_ethernet_tests"
 
-  pytest tests/tt_metal/microbenchmarks/ethernet/test_ethernet_bidirectional_bandwidth_microbenchmark.py
-  pytest tests/tt_metal/microbenchmarks/ethernet/test_ethernet_ring_latency_microbenchmark.py
+  pytest -n auto tests/tt_metal/microbenchmarks/ethernet/test_ethernet_bidirectional_bandwidth_microbenchmark.py ; fail+=$?
+  pytest -n auto tests/tt_metal/microbenchmarks/ethernet/test_ethernet_ring_latency_microbenchmark.py ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
@@ -28,10 +28,10 @@ run_t3000_llama2_70b_tests() {
   export LLAMA_TOKENIZER_PATH=/mnt/MLPerf/tt_dnn-models/llama-2/tokenizer.model
   export LLAMA_CACHE_PATH=/mnt/MLPerf/tt_dnn-models/llama-2/llama-data-cache/weights-cache-2
 
-  pytest models/demos/t3000/llama2_70b/tests/test_llama_mlp_t3000.py
-  pytest models/demos/t3000/llama2_70b/tests/test_llama_attention_t3000.py
-  pytest models/demos/t3000/llama2_70b/tests/test_llama_decoder_t3000.py
-  pytest models/demos/t3000/llama2_70b/tests/test_llama_model_t3000.py
+  pytest -n auto models/demos/t3000/llama2_70b/tests/test_llama_mlp_t3000.py ; fail+=$?
+  pytest -n auto models/demos/t3000/llama2_70b/tests/test_llama_attention_t3000.py ; fail+=$?
+  pytest -n auto models/demos/t3000/llama2_70b/tests/test_llama_decoder_t3000.py ; fail+=$?
+  pytest -n auto models/demos/t3000/llama2_70b/tests/test_llama_model_t3000.py ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
@@ -48,8 +48,8 @@ run_t3000_llama2_70b_experimental_tests() {
   # Removing tests to reduce the time taken to run the tests
   # WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/experimental/llama2_70b/tests/test_llama_mlp_t3000.py
   # WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/experimental/llama2_70b/tests/test_llama_attention_t3000.py
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/experimental/llama2_70b/tests/test_llama_decoder_t3000.py
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/experimental/llama2_70b/tests/test_llama_model_t3000.py
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/experimental/llama2_70b/tests/test_llama_decoder_t3000.py ; fail+=$?
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/experimental/llama2_70b/tests/test_llama_model_t3000.py ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
@@ -64,7 +64,7 @@ run_t3000_mixtral_tests() {
   echo "LOG_METAL: Running run_t3000_mixtral_tests"
 
   # mixtral8x7b 8 chip decode model test (env flags set inside the test)
-  pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_model.py::test_mixtral_model_inference[wormhole_b0-True-10-1-pcc]
+  pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_model.py::test_mixtral_model_inference[wormhole_b0-True-10-1-pcc] ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
@@ -78,8 +78,8 @@ run_t3000_tteager_tests() {
 
   echo "LOG_METAL: Running run_t3000_tteager_tests"
 
-  pytest tests/tt_eager/python_api_testing/unit_testing/misc/test_all_gather.py -k post_commit
-  pytest tests/tt_eager/python_api_testing/unit_testing/misc/test_reduce_scatter_post_commit.py
+  pytest -n auto tests/tt_eager/python_api_testing/unit_testing/misc/test_all_gather.py -k post_commit ; fail+=$?
+  pytest -n auto tests/tt_eager/python_api_testing/unit_testing/misc/test_reduce_scatter_post_commit.py ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
@@ -91,9 +91,8 @@ run_t3000_trace_stress_tests() {
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_trace_stress_tests"
-
-  NUM_TRACE_LOOPS=15 pytest tests/ttnn/unit_tests/test_multi_device_trace.py
-  NUM_TRACE_LOOPS=15 WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest tests/ttnn/unit_tests/test_multi_device_trace.py
+  NUM_TRACE_LOOPS=15 pytest -n auto tests/ttnn/unit_tests/test_multi_device_trace.py ; fail+=$?
+  NUM_TRACE_LOOPS=15 WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto tests/ttnn/unit_tests/test_multi_device_trace.py ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
@@ -108,10 +107,10 @@ run_t3000_falcon40b_tests() {
 
   echo "LOG_METAL: Running run_t3000_falcon40b_tests"
 
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_mlp.py
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_attention.py
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_decoder.py
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_causallm.py
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_falcon_mlp.py ; fail+=$?
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_falcon_attention.py ; fail+=$?
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_falcon_decoder.py ; fail+=$?
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_falcon_causallm.py ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
@@ -126,9 +125,6 @@ run_t3000_tests() {
   # Run tteager tests
   run_t3000_tteager_tests
 
-  # Run trace tests
-  run_t3000_trace_stress_tests
-
   # Run llama2-70b experimental tests
   run_t3000_llama2_70b_experimental_tests
 
@@ -141,8 +137,12 @@ run_t3000_tests() {
   # Run mixtral tests
   run_t3000_mixtral_tests
 
+  # Run trace tests
+  run_t3000_trace_stress_tests
+
 }
 
+fail=0
 main() {
   if [[ -z "$TT_METAL_HOME" ]]; then
     echo "Must provide TT_METAL_HOME in environment" 1>&2
@@ -159,6 +159,10 @@ main() {
   export PYTHONPATH=$TT_METAL_HOME
 
   run_t3000_tests
+
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 main "$@"
diff --git a/tests/scripts/t3000/run_t3000_model_perf_tests.sh b/tests/scripts/t3000/run_t3000_model_perf_tests.sh
index 11fe89f1f5c..4176d15afb5 100755
--- a/tests/scripts/t3000/run_t3000_model_perf_tests.sh
+++ b/tests/scripts/t3000/run_t3000_model_perf_tests.sh
@@ -1,6 +1,6 @@
 
 #/bin/bash
-set -eo pipefail
+# set -eo pipefail
 
 run_t3000_falcon7b_tests() {
   # Record the start time
@@ -8,7 +8,7 @@ run_t3000_falcon7b_tests() {
 
   echo "LOG_METAL: Running run_t3000_falcon7b_tests"
 
-  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/falcon7b/tests -m "model_perf_t3000"
+  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests -m "model_perf_t3000" ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
@@ -22,7 +22,7 @@ run_t3000_mixtral_tests() {
 
   echo "LOG_METAL: Running run_t3000_mixtral_tests"
 
-  env pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py::test_mixtral_model_perf[wormhole_b0-True-2048-150-0.025] -m "model_perf_t3000"
+  env pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py::test_mixtral_model_perf[wormhole_b0-True-2048-150-0.025] -m "model_perf_t3000" ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
@@ -36,7 +36,7 @@ run_t3000_llama2_70b_tests() {
 
   echo "LOG_METAL: Running run_t3000_llama2_70b_tests"
 
-  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/experimental/llama2_70b/tests/test_llama_perf_decode.py -m "model_perf_t3000"
+  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/experimental/llama2_70b/tests/test_llama_perf_decode.py -m "model_perf_t3000" ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
@@ -50,7 +50,7 @@ run_t3000_falcon40b_tests() {
 
   echo "LOG_METAL: Running run_t3000_falcon40b_tests"
 
-  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_perf_falcon.py -m "model_perf_t3000"
+  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_perf_falcon.py -m "model_perf_t3000" ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
@@ -80,6 +80,7 @@ run_t3000_cnn_tests() {
   env python models/perf/merge_perf_results.py
 }
 
+fail=0
 main() {
   # Parse the arguments
   while [[ $# -gt 0 ]]; do
@@ -123,6 +124,10 @@ main() {
     echo "$pipeline_type is invalid (supported: [cnn_model_perf_t3000_device, cnn_model_perf_t3000_device])" 2>&1
     exit 1
   fi
+
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 main "$@"
diff --git a/tests/scripts/t3000/run_t3000_unit_tests.sh b/tests/scripts/t3000/run_t3000_unit_tests.sh
index a1ae782705e..358ef450520 100755
--- a/tests/scripts/t3000/run_t3000_unit_tests.sh
+++ b/tests/scripts/t3000/run_t3000_unit_tests.sh
@@ -1,6 +1,6 @@
 
 #/bin/bash
-set -eo pipefail
+# set -eo pipefail
 
 run_t3000_ttmetal_tests() {
   # Record the start time
@@ -8,13 +8,13 @@ run_t3000_ttmetal_tests() {
 
   echo "LOG_METAL: Running run_t3000_ttmetal_tests"
 
-  TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsDirectSendAllConnectedChips"
-  TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsSendInterleavedBufferAllConnectedChips"
-  TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsDirectRingGatherAllChips"
-  TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsInterleavedRingGatherAllChips"
-  TT_METAL_ENABLE_REMOTE_CHIP=1 ./build/test/tt_metal/unit_tests_fast_dispatch --gtest_filter="CommandQueueSingleCardFixture.*"
-  ./build/test/tt_metal/unit_tests_fast_dispatch --gtest_filter="CommandQueueMultiDeviceFixture.*"
-  ./build/test/tt_metal/unit_tests_fast_dispatch --gtest_filter="DPrintFixture.*:WatcherFixture.*"
+  TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsDirectSendAllConnectedChips" ; fail+=$?
+  TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsSendInterleavedBufferAllConnectedChips" ; fail+=$?
+  TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsDirectRingGatherAllChips" ; fail+=$?
+  TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsInterleavedRingGatherAllChips" ; fail+=$?
+  TT_METAL_ENABLE_REMOTE_CHIP=1 ./build/test/tt_metal/unit_tests_fast_dispatch --gtest_filter="CommandQueueSingleCardFixture.*" ; fail+=$?
+  ./build/test/tt_metal/unit_tests_fast_dispatch --gtest_filter="CommandQueueMultiDeviceFixture.*" ; fail+=$?
+  ./build/test/tt_metal/unit_tests_fast_dispatch --gtest_filter="DPrintFixture.*:WatcherFixture.*" ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
@@ -27,10 +27,10 @@ run_t3000_ttnn_tests() {
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_ttnn_tests"
-  pytest tests/ttnn/unit_tests/test_multi_device_trace.py
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest tests/ttnn/unit_tests/test_multi_device_trace.py
-  pytest tests/ttnn/unit_tests/test_multi_device.py
-  pytest tests/ttnn/unit_tests/test_multi_device_async.py
+  pytest -n auto tests/ttnn/unit_tests/test_multi_device_trace.py ; fail+=$?
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest tests/ttnn/unit_tests/test_multi_device_trace.py ; fail+=$?
+  pytest -n auto tests/ttnn/unit_tests/test_multi_device.py ; fail+=$?
+  pytest -n auto tests/ttnn/unit_tests/test_multi_device_async.py ; fail+=$?
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
@@ -43,9 +43,9 @@ run_t3000_falcon7b_tests() {
 
   echo "LOG_METAL: Running run_t3000_falcon7b_tests"
 
-  pytest models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_mlp.py
-  pytest models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_attention.py
-  pytest models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_decoder.py
+  pytest -n auto models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_mlp.py ; fail+=$?
+  pytest -n auto models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_attention.py ; fail+=$?
+  pytest -n auto models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_decoder.py ; fail+=$?
   #pytest models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_causallm.py
 
   # Record the end time
@@ -60,7 +60,7 @@ run_t3000_falcon40b_tests() {
 
   echo "LOG_METAL: Running run_t3000_falcon40b_tests"
 
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_1_layer_t3000.py
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_1_layer_t3000.py ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
@@ -74,13 +74,13 @@ run_t3000_mixtral_tests() {
 
   echo "LOG_METAL: Running run_t3000_mixtral_tests"
 
-  pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_attention.py
-  pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_mlp.py
-  pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_rms_norm.py
-  pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_embedding.py
-  pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_moe.py
-  pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_decoder.py
-  pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_model.py::test_mixtral_model_inference[wormhole_b0-True-1-1-pcc]
+  pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_attention.py ; fail+=$?
+  pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_mlp.py ; fail+=$?
+  pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_rms_norm.py ; fail+=$?
+  pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_embedding.py ; fail+=$?
+  pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_moe.py ; fail+=$?
+  pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_decoder.py ; fail+=$?
+  pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_model.py::test_mixtral_model_inference[wormhole_b0-True-1-1-pcc] ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
@@ -89,8 +89,6 @@ run_t3000_mixtral_tests() {
 }
 
 run_t3000_tests() {
-  # Run ttmetal tests
-  run_t3000_ttmetal_tests
 
   # Run ttnn tests
   run_t3000_ttnn_tests
@@ -103,8 +101,12 @@ run_t3000_tests() {
 
   # Run mixtral tests
   run_t3000_mixtral_tests
+
+  # Run ttmetal tests
+  run_t3000_ttmetal_tests
 }
 
+fail=0
 main() {
   if [[ -z "$TT_METAL_HOME" ]]; then
     echo "Must provide TT_METAL_HOME in environment" 1>&2
@@ -121,6 +123,10 @@ main() {
   export PYTHONPATH=$TT_METAL_HOME
 
   run_t3000_tests
+
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 main "$@"
diff --git a/tt_metal/python_env/requirements-dev.txt b/tt_metal/python_env/requirements-dev.txt
index f7f90202919..5a6cf7ebb88 100644
--- a/tt_metal/python_env/requirements-dev.txt
+++ b/tt_metal/python_env/requirements-dev.txt
@@ -21,6 +21,7 @@ mypy==1.9.0
 pytest==7.2.2
 pytest-timeout==2.2.0
 pytest-split==0.8.2
+pytest-xdist==3.6.1
 jsbeautifier==1.14.7
 datasets==2.9.0
 torch==2.2.1.0+cpu