From d525b1710a5201c0329ffba5686c40217be0b414 Mon Sep 17 00:00:00 2001
From: Vincent Tang <vtang@tenstorrent.com>
Date: Thu, 13 Jun 2024 22:02:21 +0000
Subject: [PATCH] #8729: xdist + reset mechanism on fd nightly, model perf, all
 t3k (except profiler) - enable timeout mechanism by default if using xdist,
 use 'metal-timeout' flag to enable if not using xdist - increase GH actions
 timeout for xdist (review) - get timings of each test and set global timeout
 to 5 mins (review) - add custom timeouts to nightly + t3k pipelines +
 post-commit (review)

---
 ...-dispatch-full-regressions-and-models.yaml |  2 +-
 .github/workflows/perf-models.yaml            |  2 +-
 .github/workflows/t3000-demo-tests.yaml       |  3 +-
 .github/workflows/t3000-frequent-tests.yaml   |  1 +
 .github/workflows/t3000-model-perf-tests.yaml |  9 +--
 .github/workflows/t3000-unit-tests.yaml       |  1 +
 conftest.py                                   | 59 ++++++++--------
 pytest.ini                                    |  2 +-
 tests/scripts/run_performance.sh              | 25 +++----
 tests/scripts/run_tests.sh                    |  2 +-
 .../single_card/nightly/run_common_models.sh  |  9 ++-
 .../single_card/nightly/run_gs_only.sh        | 11 ++-
 tests/scripts/single_card/nightly/run_ttnn.sh |  9 ++-
 .../single_card/nightly/run_wh_b0_only.sh     | 11 ++-
 .../single_card/nightly/run_wh_b0_unstable.sh |  9 ++-
 tests/scripts/t3000/run_t3000_demo_tests.sh   | 37 +++++++---
 .../scripts/t3000/run_t3000_frequent_tests.sh | 68 +++++++++++++------
 .../t3000/run_t3000_model_perf_tests.sh       | 31 +++++++--
 tests/scripts/t3000/run_t3000_unit_tests.sh   | 66 ++++++++++++------
 tt_metal/python_env/requirements-dev.txt      |  1 +
 20 files changed, 238 insertions(+), 120 deletions(-)

diff --git a/.github/workflows/fast-dispatch-full-regressions-and-models.yaml b/.github/workflows/fast-dispatch-full-regressions-and-models.yaml
index 115b9415452..b6dc4f619c5 100644
--- a/.github/workflows/fast-dispatch-full-regressions-and-models.yaml
+++ b/.github/workflows/fast-dispatch-full-regressions-and-models.yaml
@@ -26,7 +26,7 @@ jobs:
             { name: "N300 WH-only models", arch: wormhole_b0, cmd: tests/scripts/single_card/nightly/run_wh_b0_only.sh, timeout: 30 },
             { name: "API tests GS", arch: grayskull, cmd: ./tests/scripts/run_tests.sh --tt-arch grayskull --pipeline-type frequent_api --dispatch-mode fast, timeout: 40 },
             { name: "API tests N300 WH B0", arch: wormhole_b0, cmd: ./tests/scripts/run_tests.sh --tt-arch wormhole_b0 --pipeline-type frequent_api --dispatch-mode fast, timeout: 40 },
-            { name: "[Unstable] N300 models", arch: wormhole_b0, cmd: tests/scripts/single_card/nightly/run_wh_b0_unstable.sh, timeout: 35 },
+            { name: "[Unstable] N300 models", arch: wormhole_b0, cmd: tests/scripts/single_card/nightly/run_wh_b0_unstable.sh, timeout: 45 },
           ]
     name: FD ${{ matrix.test-group.name }} ${{ matrix.test-group.arch }}
     env:
diff --git a/.github/workflows/perf-models.yaml b/.github/workflows/perf-models.yaml
index 8c423e865c1..f5905175e7e 100644
--- a/.github/workflows/perf-models.yaml
+++ b/.github/workflows/perf-models.yaml
@@ -52,7 +52,7 @@ jobs:
       - uses: ./.github/actions/install-python-deps
       - name: Run performance regressions
         id: performance_tests
-        timeout-minutes: 30
+        timeout-minutes: 40
         run: |
           source ${{ github.workspace }}/python_env/bin/activate
           ./tests/scripts/run_tests.sh --tt-arch $ARCH_NAME --pipeline-type ${{ matrix.model-type }}_models_performance_${{ matrix.test-info.machine-type }}
diff --git a/.github/workflows/t3000-demo-tests.yaml b/.github/workflows/t3000-demo-tests.yaml
index a05a651f0c5..ca524dd3a8a 100644
--- a/.github/workflows/t3000-demo-tests.yaml
+++ b/.github/workflows/t3000-demo-tests.yaml
@@ -17,7 +17,7 @@ jobs:
       fail-fast: false
       matrix:
         test-group: [
-          { name: "t3k falcon40b tests", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 40, owner_id: U044T8U8DEF}, #Johanna Rock
+          { name: "t3k falcon40b tests", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 50, owner_id: U044T8U8DEF}, #Johanna Rock
           { name: "t3k llama3_70b tests", arch: wormhole_b0, cmd: run_t3000_llama3_70b_tests, timeout: 30, owner_id: U03FJB5TM5Y}, #Colman Glagovich
           { name: "t3k falcon7b tests", arch: wormhole_b0, cmd: run_t3000_falcon7b_tests, timeout: 90, owner_id: U05RWH3QUPM}, #Salar Hosseini
           { name: "t3k mixtral tests", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 30, owner_id: U03PUAKE719}, # Miguel Tairum
@@ -46,6 +46,7 @@ jobs:
         run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
       - uses: ./.github/actions/install-python-deps
       - name: Run demo regression tests
+        shell: bash {0}
         timeout-minutes: ${{ matrix.test-group.timeout }}
         run: |
           source ${{ github.workspace }}/python_env/bin/activate
diff --git a/.github/workflows/t3000-frequent-tests.yaml b/.github/workflows/t3000-frequent-tests.yaml
index d6feebce9df..70a13c371f5 100644
--- a/.github/workflows/t3000-frequent-tests.yaml
+++ b/.github/workflows/t3000-frequent-tests.yaml
@@ -42,6 +42,7 @@ jobs:
         run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
       - uses: ./.github/actions/install-python-deps
       - name: Run frequent regression tests
+        shell: bash {0}
         timeout-minutes: ${{ matrix.test-group.timeout }}
         run: |
           source ${{ github.workspace }}/python_env/bin/activate
diff --git a/.github/workflows/t3000-model-perf-tests.yaml b/.github/workflows/t3000-model-perf-tests.yaml
index 3edeb388469..4995b036238 100644
--- a/.github/workflows/t3000-model-perf-tests.yaml
+++ b/.github/workflows/t3000-model-perf-tests.yaml
@@ -17,10 +17,10 @@ jobs:
       fail-fast: false
       matrix:
         test-group: [
-          { name: "t3k LLM falcon7b model perf tests", model: "falcob7b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon7b_tests, timeout: 60, owner_id: S07AJBTLX2L}, #Model Falcon
-          { name: "t3k LLM mixtral model perf tests", model: "mixtral", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 60, owner_id: U03PUAKE719}, # Miguel Tairum
-          { name: "t3k LLM llama2 model perf tests", model: "llama2", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 60, owner_id: U03FJB5TM5Y}, #Colman Glagovich
-          { name: "t3k LLM falcon40b model perf tests", model: "falcon40b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 60, owner_id: S07AJBTLX2L}, # Model Falcon
+          { name: "t3k LLM falcon7b model perf tests", model: "falcob7b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon7b_tests, timeout: 75, owner_id: S07AJBTLX2L}, #Model Falcon
+          { name: "t3k LLM mixtral model perf tests", model: "mixtral", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_mixtral_tests, timeout: 75, owner_id: U03PUAKE719}, # Miguel Tairum
+          { name: "t3k LLM llama2 model perf tests", model: "llama2", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_llama2_70b_tests, timeout: 75, owner_id: U03FJB5TM5Y}, #Colman Glagovich
+          { name: "t3k LLM falcon40b model perf tests", model: "falcon40b", model-type: "LLM", arch: wormhole_b0, cmd: run_t3000_falcon40b_tests, timeout: 75, owner_id: S07AJBTLX2L}, # Model Falcon
           #{ name: "t3k CNN model perf tests ", model-type: "CNN", arch: wormhole_b0, cmd: run_t3000_cnn_tests, timeout: 120, owner_id: }, #No tests are being run?
         ]
     name: ${{ matrix.test-group.name }}
@@ -52,6 +52,7 @@ jobs:
         run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
       - uses: ./.github/actions/install-python-deps
       - name: Run model perf regression tests
+        shell: bash {0}
         timeout-minutes: ${{ matrix.test-group.timeout }}
         run: |
           source ${{ github.workspace }}/python_env/bin/activate
diff --git a/.github/workflows/t3000-unit-tests.yaml b/.github/workflows/t3000-unit-tests.yaml
index 5b9e99baaa2..297b863399f 100644
--- a/.github/workflows/t3000-unit-tests.yaml
+++ b/.github/workflows/t3000-unit-tests.yaml
@@ -43,6 +43,7 @@ jobs:
         run: tar -xvf ttm_${{ matrix.test-group.arch }}.tar
       - uses: ./.github/actions/install-python-deps
       - name: Run unit regression tests
+        shell: bash {0}
         timeout-minutes: ${{ matrix.test-group.timeout }}
         run: |
           source ${{ github.workspace }}/python_env/bin/activate
diff --git a/conftest.py b/conftest.py
index c6339ee3ae1..cbbda1b9e72 100644
--- a/conftest.py
+++ b/conftest.py
@@ -85,8 +85,6 @@ def device(request, device_params):
     import tt_lib as ttl
 
     device_id = request.config.getoption("device_id")
-
-    request.node.device_ids = [device_id]
     request.node.pci_ids = [ttl.device.GetPCIeDeviceID(device_id)]
 
     num_devices = ttl.device.GetNumPCIeDevices()
@@ -108,9 +106,7 @@ def pcie_devices(request, device_params):
 
     num_devices = ttl.device.GetNumPCIeDevices()
     device_ids = [i for i in range(num_devices)]
-
-    request.node.device_ids = device_ids
-    request.node.pci_ids = [ttl.device.GetPCIeDeviceID(i) for i in device_ids]
+    request.node.pci_ids = device_ids
 
     # Get only physical devices
     devices = ttl.device.CreateDevices(device_ids, **device_params)
@@ -129,8 +125,6 @@ def all_devices(request, device_params):
 
     num_devices = ttl.device.GetNumAvailableDevices()
     device_ids = [i for i in range(num_devices)]
-
-    request.node.device_ids = device_ids
     request.node.pci_ids = [ttl.device.GetPCIeDeviceID(i) for i in device_ids]
 
     # Get only physical devices
@@ -155,7 +149,6 @@ def device_mesh(request, silicon_arch_name, silicon_arch_wormhole_b0, device_par
     except (ValueError, AttributeError):
         num_devices_requested = len(device_ids)
 
-    request.node.device_ids = device_ids[:num_devices_requested]
     request.node.pci_ids = [ttl.device.GetPCIeDeviceID(i) for i in device_ids[:num_devices_requested]]
 
     device_mesh = ttnn.open_device_mesh(
@@ -183,8 +176,7 @@ def pcie_device_mesh(request, silicon_arch_name, silicon_arch_wormhole_b0, devic
     except (ValueError, AttributeError):
         num_pcie_devices_requested = len(device_ids)
 
-    request.node.device_ids = device_ids[:num_pcie_devices_requested]
-    request.node.pci_ids = [ttl.device.GetPCIeDeviceID(i) for i in device_ids[:num_pcie_devices_requested]]
+    request.node.pci_ids = device_ids[:num_pcie_devices_requested]
 
     device_mesh = ttnn.open_device_mesh(
         ttnn.DeviceGrid(1, num_pcie_devices_requested), device_ids[:num_pcie_devices_requested], **device_params
@@ -213,7 +205,6 @@ def t3k_device_mesh(request, silicon_arch_name, silicon_arch_wormhole_b0, device
     except (ValueError, AttributeError):
         num_devices_requested = len(device_ids)
 
-    request.node.device_ids = device_ids[:num_devices_requested]
     request.node.pci_ids = [ttl.device.GetPCIeDeviceID(i) for i in device_ids[:num_devices_requested]]
 
     device_mesh = ttnn.open_device_mesh(
@@ -334,13 +325,18 @@ def pytest_addoption(parser):
     )
     parser.addoption("--cli-input", action="store", default=None, help="Enter prompt if --input-method=cli")
     parser.addoption(
-        "--metal-cleanup",
+        "--metal-timeout",
         action="store",
         default=None,
         help="Enable process timeout",
     )
 
 
+@pytest.fixture
+def input_path(request):
+    return request.config.getoption("--input-path")
+
+
 def pytest_generate_tests(metafunc):
     """
     This is not a standard docstring.
@@ -473,14 +469,15 @@ def pytest_runtest_makereport(item, call):
 @pytest.hookimpl(hookwrapper=True)
 def pytest_runtest_teardown(item, nextitem):
     yield
-    metal_cleanup_enabled = item.config.getoption("--metal-cleanup")
-    if metal_cleanup_enabled is not None:
+    metal_timeout_enabled = item.config.getoption("--metal-timeout")
+    using_xdist = int(os.getenv("PYTEST_XDIST_WORKER_COUNT", "0"))
+
+    if metal_timeout_enabled is not None or using_xdist:
         report = item.stash[phase_report_key]
         test_failed = report.get("call", None) and report["call"].failed
         if test_failed:
-            logger.info(f"In custom teardown, open device ids: {item.device_ids} {set(item.pci_ids)}")
-            # reset_tensix(set(item.pci_ids))
-            reset_tensix()
+            logger.info(f"In custom teardown, open device ids: {set(item.pci_ids)}")
+            reset_tensix(set(item.pci_ids))
 
 
 # This is overriding the timer setup hook from pytest-timeout
@@ -488,10 +485,12 @@ def pytest_runtest_teardown(item, nextitem):
 # At timeout, the process kills it's parent (the test process) and then itself
 @pytest.hookimpl(tryfirst=True)
 def pytest_timeout_set_timer(item, settings):
-    metal_timeout_enabled = item.config.getoption("--metal-cleanup")
-    if metal_timeout_enabled is not None:
+    metal_timeout_enabled = item.config.getoption("--metal-timeout")
+    using_xdist = int(os.getenv("PYTEST_XDIST_WORKER_COUNT", "0"))
+
+    if metal_timeout_enabled is not None or using_xdist:
         parent_pid = os.getpid()
-        logger.info(f"Metal timeout {settings.timeout} seconds")
+        logger.info(f"Metal timeout {settings.timeout} seconds {parent_pid} for {item.nodeid}")
 
         def get_parent_status():
             try:
@@ -501,12 +500,15 @@ def get_parent_status():
             return parent.status()
 
         def run_timer(settings):
+            logger.info(f"Timer started for {item.nodeid}")
             dead_status = ["zombie", "dead", "already dead"]
             timeout = settings.timeout
-            while get_parent_status() not in dead_status and timeout > 0:
-                time.sleep(1)
-                timeout -= 1
-            if get_parent_status() != "already dead":
+            parent_status = "running"
+            while parent_status not in dead_status and timeout > 0:
+                time.sleep(5)
+                timeout -= 5
+                parent_status = get_parent_status()
+            if parent_status != "already dead":
                 logger.info(f"Timing out test case")
                 os.kill(parent_pid, signal.SIGKILL)
             logger.info(f"Killing timer")
@@ -519,13 +521,12 @@ def cancel():
         metal_timer = multiprocess.Process(target=run_timer, args=(settings,), daemon=True)
         item.cancel_timeout = cancel
         metal_timer.start()
-        # logger.info(f"parent and metal timer pid: {parent_pid} {metal_timer.pid}")
     return True
 
 
 # This is a hook used in pytest-xdist to handle when a worker crashes out
 # In our case, combined with pytest-timeout thread method, the worker will crash out for a hang and
-# then it should get cleaned up by the controller through this fixture :fingers_crossed:
+# then it should get cleaned up by the controller through this fixture
 @pytest.hookimpl(tryfirst=True)
 def pytest_handlecrashitem(crashitem, report, sched):
     reset_tensix()
@@ -542,10 +543,9 @@ def reset_tensix(tt_open_devices=None):
         smi_reset_result = run_process_and_get_result(f"/opt/tt_metal_infra/scripts/ci/{arch}/reset.sh")
     else:
         tt_open_devices_str = ",".join([str(i) for i in tt_open_devices])
-        check_smi = run_process_and_get_result("tt-smi-metal -h")
-        logger.info(f"Check tt-smi-metal exists: {check_smi.returncode}")
+        check_smi_metal = run_process_and_get_result("tt-smi-metal -h")
         logger.info(f"Running reset for pci devices: {tt_open_devices_str}")
-        if check_smi.returncode > 0:
+        if check_smi_metal.returncode > 0:
             logger.info(f"Test failed - resetting {arch} with tt-smi")
             smi_reset_result = run_process_and_get_result(f"tt-smi -r {tt_open_devices_str}")
         else:
@@ -555,5 +555,4 @@ def reset_tensix(tt_open_devices=None):
 
 @pytest.hookimpl(tryfirst=True)
 def pytest_xdist_auto_num_workers(config):
-    logger.info("getting num of xdist workers")
     return 1
diff --git a/pytest.ini b/pytest.ini
index c8f8a206f75..699ef215218 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,7 +1,7 @@
 [pytest]
 timeout = 300
 minversion = 7.2
-addopts = --import-mode=importlib -vs -rA
+addopts = --import-mode=importlib -vvs -rA --durations=0
 empty_parameter_set_mark = skip
 markers =
     post_commit: mark tests to run on post-commit
diff --git a/tests/scripts/run_performance.sh b/tests/scripts/run_performance.sh
index 754bcbc9ab1..91567864538 100755
--- a/tests/scripts/run_performance.sh
+++ b/tests/scripts/run_performance.sh
@@ -1,6 +1,6 @@
 #/bin/bash
 
-set -eo pipefail
+# set -eo pipefail
 
 if [[ -z "$TT_METAL_HOME" ]]; then
   echo "Must provide TT_METAL_HOME in environment" 1>&2
@@ -11,19 +11,19 @@ run_perf_models_other() {
     local tt_arch=$1
     local test_marker=$2
 
-    env pytest tests/ttnn/integration_tests/resnet/test_performance.py -m $test_marker
+    env pytest -n auto tests/ttnn/integration_tests/resnet/test_performance.py -m $test_marker
 
-    env pytest tests/ttnn/integration_tests/bert/test_performance.py -m $test_marker
+    env pytest -n auto tests/ttnn/integration_tests/bert/test_performance.py -m $test_marker
 
-    env pytest models/demos/ttnn_falcon7b/tests -m $test_marker
+    env pytest -n auto models/demos/ttnn_falcon7b/tests -m $test_marker
 
     # Separate calls since we can't mix switching between number of cqs
-    env pytest models/demos/resnet/tests/test_perf_resnet.py -m $test_marker
-    env pytest models/demos/resnet/tests/test_perf_resnet_2cqs.py -m $test_marker
+    env pytest -n auto models/demos/resnet/tests/test_perf_resnet.py -m $test_marker
+    env pytest -n auto models/demos/resnet/tests/test_perf_resnet_2cqs.py -m $test_marker
 
-    env pytest tests/ttnn/integration_tests/whisper/test_performance.py -m $test_marker
+    env pytest -n auto tests/ttnn/integration_tests/whisper/test_performance.py -m $test_marker
 
-    env pytest models/demos/metal_BERT_large_11/tests -m $test_marker
+    env pytest -n auto models/demos/metal_BERT_large_11/tests -m $test_marker
 
     ## Merge all the generated reports
     env python models/perf/merge_perf_results.py
@@ -33,13 +33,13 @@ run_perf_models_llm_javelin() {
     local tt_arch=$1
     local test_marker=$2
 
-    env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/falcon7b/tests -m $test_marker
+    env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests -m $test_marker
 
     if [ "$tt_arch" == "wormhole_b0" ]; then
-        env pytest models/demos/mamba/tests -m $test_marker --timeout=360
+        env pytest -n auto models/demos/mamba/tests -m $test_marker --timeout=360
     fi
 
-    env  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/wormhole/mistral7b/tests -m $test_marker --timeout=360
+    env  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/wormhole/mistral7b/tests -m $test_marker --timeout=360
 
     ## Merge all the generated reports
     env python models/perf/merge_perf_results.py
@@ -50,7 +50,7 @@ run_perf_models_cnn_javelin() {
     local test_marker=$2
 
     # Run tests
-    env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest tests/device_perf_tests/stable_diffusion -m $test_marker --timeout=480
+    env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto tests/device_perf_tests/stable_diffusion -m $test_marker --timeout=480
     #env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/experimental/functional_unet/tests -m $test_marker
 
     ## Merge all the generated reports
@@ -58,6 +58,7 @@ run_perf_models_cnn_javelin() {
 }
 
 run_device_perf_models() {
+    set -eo pipefail
     local test_marker=$1
 
     env pytest tests/device_perf_tests/stable_diffusion -m $test_marker --timeout=600
diff --git a/tests/scripts/run_tests.sh b/tests/scripts/run_tests.sh
index 334b68b71fd..ebd25264b9c 100755
--- a/tests/scripts/run_tests.sh
+++ b/tests/scripts/run_tests.sh
@@ -81,7 +81,7 @@ run_frequent_api_pipeline_tests() {
         ./tests/scripts/run_python_api_unit_tests.sh
     else
         if [[ $tt_arch == "wormhole_b0" ]]; then
-            pytest  tests/tt_eager/python_api_testing/unit_testing/misc/test_all_gather.py -k nightly
+            pytest -n auto tests/tt_eager/python_api_testing/unit_testing/misc/test_all_gather.py -k nightly
         else
             echo "API tests are not available for fast dispatch because they're already covered in post-commit"
         fi
diff --git a/tests/scripts/single_card/nightly/run_common_models.sh b/tests/scripts/single_card/nightly/run_common_models.sh
index 17ca8c4d3cf..19e090065f3 100755
--- a/tests/scripts/single_card/nightly/run_common_models.sh
+++ b/tests/scripts/single_card/nightly/run_common_models.sh
@@ -1,12 +1,17 @@
 #/bin/bash
 
-set -eo pipefail
+# set -eo pipefail
 
 if [[ -z "$TT_METAL_HOME" ]]; then
   echo "Must provide TT_METAL_HOME in environment" 1>&2
   exit 1
 fi
+fail=0
 
 echo "Running common models for archs"
 
-env pytest tests/nightly/common_models/
+env pytest -n auto tests/nightly/common_models/ ; fail+=$?
+
+if [[ $fail -ne 0 ]]; then
+  exit 1
+fi
diff --git a/tests/scripts/single_card/nightly/run_gs_only.sh b/tests/scripts/single_card/nightly/run_gs_only.sh
index c5bcc9f9745..bad5b98ea40 100755
--- a/tests/scripts/single_card/nightly/run_gs_only.sh
+++ b/tests/scripts/single_card/nightly/run_gs_only.sh
@@ -1,14 +1,19 @@
 #/bin/bash
 
-set -eo pipefail
+# set -eo pipefail
 
 if [[ -z "$TT_METAL_HOME" ]]; then
   echo "Must provide TT_METAL_HOME in environment" 1>&2
   exit 1
 fi
+fail=0
 
 echo "Running model nightly tests for GS only"
 
-env pytest models/demos/resnet/tests/test_metal_resnet50_performant.py
+env pytest -n auto models/demos/resnet/tests/test_metal_resnet50_performant.py ; fail+=$?
 
-env pytest models/demos/resnet/tests/test_metal_resnet50_2cqs_performant.py
+env pytest -n auto models/demos/resnet/tests/test_metal_resnet50_2cqs_performant.py ; fail+=$?
+
+if [[ $fail -ne 0 ]]; then
+  exit 1
+fi
diff --git a/tests/scripts/single_card/nightly/run_ttnn.sh b/tests/scripts/single_card/nightly/run_ttnn.sh
index f0bb3f9cadc..a41836173de 100755
--- a/tests/scripts/single_card/nightly/run_ttnn.sh
+++ b/tests/scripts/single_card/nightly/run_ttnn.sh
@@ -1,12 +1,17 @@
 #/bin/bash
 
-set -eo pipefail
+# set -eo pipefail
 
 if [[ -z "$TT_METAL_HOME" ]]; then
   echo "Must provide TT_METAL_HOME in environment" 1>&2
   exit 1
 fi
+fail=0
 
 echo "Running ttnn nightly tests for GS only"
 
-env pytest tests/ttnn/integration_tests -m "not models_performance_bare_metal and not models_device_performance_bare_metal"
+env pytest -n auto tests/ttnn/integration_tests -m "not models_performance_bare_metal and not models_device_performance_bare_metal" ; fail+=$?
+
+if [[ $fail -ne 0 ]]; then
+  exit 1
+fi
diff --git a/tests/scripts/single_card/nightly/run_wh_b0_only.sh b/tests/scripts/single_card/nightly/run_wh_b0_only.sh
index d30894713c1..5ae9f0657cb 100755
--- a/tests/scripts/single_card/nightly/run_wh_b0_only.sh
+++ b/tests/scripts/single_card/nightly/run_wh_b0_only.sh
@@ -1,12 +1,17 @@
 #/bin/bash
 
-set -eo pipefail
+# set -eo pipefail
 
 if [[ -z "$TT_METAL_HOME" ]]; then
   echo "Must provide TT_METAL_HOME in environment" 1>&2
   exit 1
 fi
+fail=0
 
 echo "Running nightly tests for WH B0 only"
-env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest tests/nightly/wh_b0_only_eth
-env pytest tests/nightly/wh_b0_only
\ No newline at end of file
+env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto tests/nightly/wh_b0_only_eth ; fail+=$?
+env pytest -n auto tests/nightly/wh_b0_only ; fail+=$?
+
+if [[ $fail -ne 0 ]]; then
+  exit 1
+fi
diff --git a/tests/scripts/single_card/nightly/run_wh_b0_unstable.sh b/tests/scripts/single_card/nightly/run_wh_b0_unstable.sh
index 079087d6e69..35895a64208 100755
--- a/tests/scripts/single_card/nightly/run_wh_b0_unstable.sh
+++ b/tests/scripts/single_card/nightly/run_wh_b0_unstable.sh
@@ -1,12 +1,17 @@
 #/bin/bash
 
-set -eo pipefail
+# set -eo pipefail
 
 if [[ -z "$TT_METAL_HOME" ]]; then
   echo "Must provide TT_METAL_HOME in environment" 1>&2
   exit 1
 fi
+fail=0
 
 echo "Running unstable nightly tests for WH B0 only"
 
-SLOW_MATMULS=1 WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml env pytest tests/ttnn/integration_tests/stable_diffusion
+SLOW_MATMULS=1 WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml env pytest -n auto tests/ttnn/integration_tests/stable_diffusion ; fail+=$?
+
+if [[ $fail -ne 0 ]]; then
+    exit 1
+fi
diff --git a/tests/scripts/t3000/run_t3000_demo_tests.sh b/tests/scripts/t3000/run_t3000_demo_tests.sh
index 96a05371beb..fa050429ddb 100755
--- a/tests/scripts/t3000/run_t3000_demo_tests.sh
+++ b/tests/scripts/t3000/run_t3000_demo_tests.sh
@@ -1,23 +1,27 @@
 
 #/bin/bash
-set -eo pipefail
+# set -eo pipefail
 
 run_t3000_falcon40b_tests() {
   # Record the start time
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_falcon40b_tests"
 
   # Falcon40B prefill 60 layer end to end with 10 loops; we need 8x8 grid size
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_60_layer_t3000_prefill_10_loops.py --timeout=720
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_60_layer_t3000_prefill_10_loops.py --timeout=720 ; fail+=$?
 
   # Falcon40B end to end demo (prefill + decode)
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_t3000_demo_loops.py
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_t3000_demo_loops.py ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_falcon40b_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 run_t3000_llama3_70b_tests() {
@@ -38,39 +42,47 @@ run_t3000_llama3_70b_tests() {
 
 run_t3000_falcon7b_tests(){
   # Record the start time
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_falcon7b_tests"
 
   # Falcon7B demo (perf verification for 128/1024/2048 seq lens and output token verification)
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-perf_mode_128_stochastic_verify]
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-perf_mode_1024_stochastic_verify]
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-perf_mode_2048_stochastic_verify]
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-default_mode_1024_greedy_verify]
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-perf_mode_128_stochastic_verify] ; fail+=$?
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-perf_mode_1024_stochastic_verify] ; fail+=$?
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-perf_mode_2048_stochastic_verify] ; fail+=$?
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto --disable-warnings -q -s --input-method=json --input-path='models/demos/t3000/falcon7b/input_data_t3000.json' models/demos/t3000/falcon7b/demo_t3000.py::test_demo_multichip[user_input0-8-True-default_mode_1024_greedy_verify] ; fail+=$?
 
   # Falcon7B perplexity test (prefill and decode)
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/falcon7b/tests/test_perplexity_falcon.py::test_perplexity[True-prefill_seq1024_dram] --timeout=720
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/falcon7b/tests/test_perplexity_falcon.py::test_perplexity[True-decode_1024_l1_sharded] --timeout=720
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests/test_perplexity_falcon.py::test_perplexity[True-prefill_seq1024_dram] --timeout=720 ; fail+=$?
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests/test_perplexity_falcon.py::test_perplexity[True-decode_1024_l1_sharded] --timeout=720 ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_falcon7b_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 run_t3000_mixtral_tests() {
   # Record the start time
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_mixtral8x7b_tests"
 
   # mixtral8x7b 8 chip demo test - 100 token generation with general weights (env flags set inside the test)
-  pytest models/demos/t3000/mixtral8x7b/demo/demo.py::test_mixtral8x7b_demo[wormhole_b0-True-general_weights] --timeout=720
+  pytest -n auto models/demos/t3000/mixtral8x7b/demo/demo.py::test_mixtral8x7b_demo[wormhole_b0-True-general_weights] --timeout=720 ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_mixtral_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 run_t3000_tests() {
@@ -87,6 +99,7 @@ run_t3000_tests() {
   run_t3000_mixtral_tests
 }
 
+fail=0
 main() {
     # For CI pipeline - source func commands but don't execute tests if not invoked directly
   if [[ "${BASH_SOURCE[0]}" != "${0}" ]]; then
@@ -109,6 +122,10 @@ main() {
   export PYTHONPATH=$TT_METAL_HOME
 
   run_t3000_tests
+
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 main "$@"
diff --git a/tests/scripts/t3000/run_t3000_frequent_tests.sh b/tests/scripts/t3000/run_t3000_frequent_tests.sh
index 37abf05e64d..cab852813ef 100755
--- a/tests/scripts/t3000/run_t3000_frequent_tests.sh
+++ b/tests/scripts/t3000/run_t3000_frequent_tests.sh
@@ -1,99 +1,122 @@
 
 #/bin/bash
-set -eo pipefail
+# set -eo pipefail
 
 run_t3000_ethernet_tests() {
   # Record the start time
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_ethernet_tests"
 
-  pytest tests/tt_metal/microbenchmarks/ethernet/test_ethernet_bidirectional_bandwidth_microbenchmark.py
-  pytest tests/tt_metal/microbenchmarks/ethernet/test_ethernet_ring_latency_microbenchmark.py
+  pytest -n auto tests/tt_metal/microbenchmarks/ethernet/test_ethernet_bidirectional_bandwidth_microbenchmark.py ; fail+=$?
+  pytest -n auto tests/tt_metal/microbenchmarks/ethernet/test_ethernet_ring_latency_microbenchmark.py ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_ethernet_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 run_t3000_llama2_70b_tests() {
   # Record the start time
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_llama2_70b_tests"
 
-  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/llama2_70b/tests/test_llama_mlp_t3000.py
-  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/llama2_70b/tests/test_llama_attention_t3000.py
-  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/llama2_70b/tests/test_llama_decoder_t3000.py
-  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/llama2_70b/tests/test_llama_model_t3000.py --timeout=900
+  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/llama2_70b/tests/test_llama_mlp_t3000.py ; fail+=$?
+  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/llama2_70b/tests/test_llama_attention_t3000.py ; fail+=$?
+  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/llama2_70b/tests/test_llama_decoder_t3000.py ; fail+=$?
+  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/llama2_70b/tests/test_llama_model_t3000.py --timeout=900 ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_llama2_70b_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 run_t3000_mixtral_tests() {
   # Record the start time
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_mixtral_tests"
 
   # mixtral8x7b 8 chip decode model test (env flags set inside the test)
-  pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_model.py::test_mixtral_model_inference[wormhole_b0-True-10-1-pcc]
+  pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_model.py::test_mixtral_model_inference[wormhole_b0-True-10-1-pcc] ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_mixtral_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 run_t3000_tteager_tests() {
   # Record the start time
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_tteager_tests"
 
-  pytest tests/tt_eager/python_api_testing/unit_testing/misc/test_all_gather.py -k post_commit
-  pytest tests/tt_eager/python_api_testing/unit_testing/misc/test_reduce_scatter_post_commit.py
+  pytest -n auto tests/tt_eager/python_api_testing/unit_testing/misc/test_all_gather.py -k post_commit ; fail+=$?
+  pytest -n auto tests/tt_eager/python_api_testing/unit_testing/misc/test_reduce_scatter_post_commit.py ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_tteager_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 run_t3000_trace_stress_tests() {
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_trace_stress_tests"
-
-  NUM_TRACE_LOOPS=15 pytest tests/ttnn/unit_tests/test_multi_device_trace.py
-  NUM_TRACE_LOOPS=15 WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest tests/ttnn/unit_tests/test_multi_device_trace.py
+  NUM_TRACE_LOOPS=15 pytest -n auto tests/ttnn/unit_tests/test_multi_device_trace.py ; fail+=$?
+  NUM_TRACE_LOOPS=15 WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto tests/ttnn/unit_tests/test_multi_device_trace.py ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_trace_stress_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 
 run_t3000_falcon40b_tests() {
+  fail=0
   # Record the start time
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_falcon40b_tests"
 
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_mlp.py
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_attention.py --timeout=480
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_decoder.py --timeout=480
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_falcon_causallm.py --timeout=600
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_falcon_mlp.py ; fail+=$?
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_falcon_attention.py --timeout=480 ; fail+=$?
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_falcon_decoder.py --timeout=480 ; fail+=$?
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_falcon_causallm.py --timeout=600 ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_falcon40b_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 run_t3000_tests() {
@@ -103,9 +126,6 @@ run_t3000_tests() {
   # Run tteager tests
   run_t3000_tteager_tests
 
-  # Run trace tests
-  run_t3000_trace_stress_tests
-
   # Run falcon40b tests
   run_t3000_falcon40b_tests
 
@@ -115,8 +135,12 @@ run_t3000_tests() {
   # Run mixtral tests
   run_t3000_mixtral_tests
 
+  # Run trace tests
+  run_t3000_trace_stress_tests
+
 }
 
+fail=0
 main() {
   # For CI pipeline - source func commands but don't execute tests if not invoked directly
   if [[ "${BASH_SOURCE[0]}" != "${0}" ]]; then
@@ -139,6 +163,10 @@ main() {
   export PYTHONPATH=$TT_METAL_HOME
 
   run_t3000_tests
+
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 main "$@"
diff --git a/tests/scripts/t3000/run_t3000_model_perf_tests.sh b/tests/scripts/t3000/run_t3000_model_perf_tests.sh
index 6140b9efeaf..6f97b8e7636 100755
--- a/tests/scripts/t3000/run_t3000_model_perf_tests.sh
+++ b/tests/scripts/t3000/run_t3000_model_perf_tests.sh
@@ -1,61 +1,77 @@
 
 #/bin/bash
-set -eo pipefail
+# set -eo pipefail
 
 run_t3000_falcon7b_tests() {
   # Record the start time
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_falcon7b_tests"
 
-  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/falcon7b/tests -m "model_perf_t3000"
+  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/falcon7b/tests -m "model_perf_t3000" ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_falcon7b_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 run_t3000_mixtral_tests() {
   # Record the start time
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_mixtral_tests"
 
-  env pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py -m "model_perf_t3000"
+  env pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_perf.py -m "model_perf_t3000" ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_mixtral_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 run_t3000_llama2_70b_tests() {
   # Record the start time
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_llama2_70b_tests"
 
-  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/llama2_70b/tests/test_llama_perf_decode.py -m "model_perf_t3000" --timeout=600
+  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/llama2_70b/tests/test_llama_perf_decode.py -m "model_perf_t3000" --timeout=600 ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_llama2_70b_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 run_t3000_falcon40b_tests() {
   # Record the start time
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_falcon40b_tests"
 
-  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/test_perf_falcon.py -m "model_perf_t3000" --timeout=600
+  env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/test_perf_falcon.py -m "model_perf_t3000" --timeout=600 ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_falcon40b_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 run_t3000_llm_tests() {
@@ -80,6 +96,7 @@ run_t3000_cnn_tests() {
   env python models/perf/merge_perf_results.py
 }
 
+fail=0
 main() {
   # For CI pipeline - source func commands but don't execute tests if not invoked directly
   if [[ "${BASH_SOURCE[0]}" != "${0}" ]]; then
@@ -129,6 +146,10 @@ main() {
     echo "$pipeline_type is invalid (supported: [cnn_model_perf_t3000_device, cnn_model_perf_t3000_device])" 2>&1
     exit 1
   fi
+
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 main "$@"
diff --git a/tests/scripts/t3000/run_t3000_unit_tests.sh b/tests/scripts/t3000/run_t3000_unit_tests.sh
index a8019137642..64c23ed2b48 100755
--- a/tests/scripts/t3000/run_t3000_unit_tests.sh
+++ b/tests/scripts/t3000/run_t3000_unit_tests.sh
@@ -1,66 +1,79 @@
 
 #/bin/bash
-set -eo pipefail
+# set -eo pipefail
 
 run_t3000_ttmetal_tests() {
   # Record the start time
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_ttmetal_tests"
 
-  TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsDirectSendAllConnectedChips"
-  TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsSendInterleavedBufferAllConnectedChips"
-  TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsDirectRingGatherAllChips"
-  TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsInterleavedRingGatherAllChips"
-  TT_METAL_ENABLE_REMOTE_CHIP=1 ./build/test/tt_metal/unit_tests_fast_dispatch --gtest_filter="CommandQueueSingleCardFixture.*"
-  ./build/test/tt_metal/unit_tests_fast_dispatch --gtest_filter="CommandQueueMultiDeviceFixture.*"
-  ./build/test/tt_metal/unit_tests_fast_dispatch --gtest_filter="DPrintFixture.*:WatcherFixture.*"
+  TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsDirectSendAllConnectedChips" ; fail+=$?
+  TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsSendInterleavedBufferAllConnectedChips" ; fail+=$?
+  TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsDirectRingGatherAllChips" ; fail+=$?
+  TT_METAL_SLOW_DISPATCH_MODE=1 ./build/test/tt_metal/unit_tests --gtest_filter="DeviceFixture.EthKernelsInterleavedRingGatherAllChips" ; fail+=$?
+  TT_METAL_ENABLE_REMOTE_CHIP=1 ./build/test/tt_metal/unit_tests_fast_dispatch --gtest_filter="CommandQueueSingleCardFixture.*" ; fail+=$?
+  ./build/test/tt_metal/unit_tests_fast_dispatch --gtest_filter="CommandQueueMultiDeviceFixture.*" ; fail+=$?
+  ./build/test/tt_metal/unit_tests_fast_dispatch --gtest_filter="DPrintFixture.*:WatcherFixture.*" ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_ttmetal_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 run_t3000_ttnn_tests() {
   # Record the start time
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_ttnn_tests"
-  pytest tests/ttnn/unit_tests/test_multi_device_trace.py
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest tests/ttnn/unit_tests/test_multi_device_trace.py
-  pytest tests/ttnn/unit_tests/test_multi_device.py
-  pytest tests/ttnn/unit_tests/test_multi_device_async.py
+  pytest -n auto tests/ttnn/unit_tests/test_multi_device_trace.py ; fail+=$?
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest tests/ttnn/unit_tests/test_multi_device_trace.py ; fail+=$?
+  pytest -n auto tests/ttnn/unit_tests/test_multi_device.py ; fail+=$?
+  pytest -n auto tests/ttnn/unit_tests/test_multi_device_async.py ; fail+=$?
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_ttnn_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 run_t3000_falcon7b_tests() {
   # Record the start time
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_falcon7b_tests"
 
-  pytest models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_mlp.py
-  pytest models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_attention.py
-  pytest models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_decoder.py
+  pytest -n auto models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_mlp.py ; fail+=$?
+  pytest -n auto models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_attention.py ; fail+=$?
+  pytest -n auto models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_decoder.py ; fail+=$?
   #pytest models/demos/ttnn_falcon7b/tests/multi_chip/test_falcon_causallm.py
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_falcon7b_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 run_t3000_falcon40b_tests() {
   # Record the start time
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_falcon40b_tests"
 
-  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_1_layer_t3000.py
+  WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest -n auto models/demos/t3000/falcon40b/tests/ci/test_falcon_end_to_end_1_layer_t3000.py ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
@@ -70,21 +83,25 @@ run_t3000_falcon40b_tests() {
 
 run_t3000_mixtral_tests() {
   # Record the start time
+  fail=0
   start_time=$(date +%s)
 
   echo "LOG_METAL: Running run_t3000_mixtral_tests"
 
-  pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_attention.py
-  pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_mlp.py
-  pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_rms_norm.py
-  pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_embedding.py
-  pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_moe.py
-  pytest models/demos/t3000/mixtral8x7b/tests/test_mixtral_decoder.py
+  pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_attention.py ; fail+=$?
+  pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_mlp.py ; fail+=$?
+  pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_rms_norm.py ; fail+=$?
+  pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_embedding.py ; fail+=$?
+  pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_moe.py ; fail+=$?
+  pytest -n auto models/demos/t3000/mixtral8x7b/tests/test_mixtral_decoder.py ; fail+=$?
 
   # Record the end time
   end_time=$(date +%s)
   duration=$((end_time - start_time))
   echo "LOG_METAL: run_t3000_mixtral_tests $duration seconds to complete"
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 run_t3000_tests() {
@@ -104,6 +121,7 @@ run_t3000_tests() {
   run_t3000_mixtral_tests
 }
 
+fail=0
 main() {
   # For CI pipeline - source func commands but don't execute tests if not invoked directly
   if [[ "${BASH_SOURCE[0]}" != "${0}" ]]; then
@@ -126,6 +144,10 @@ main() {
   export PYTHONPATH=$TT_METAL_HOME
 
   run_t3000_tests
+
+  if [[ $fail -ne 0 ]]; then
+    exit 1
+  fi
 }
 
 main "$@"
diff --git a/tt_metal/python_env/requirements-dev.txt b/tt_metal/python_env/requirements-dev.txt
index f7f90202919..5a6cf7ebb88 100644
--- a/tt_metal/python_env/requirements-dev.txt
+++ b/tt_metal/python_env/requirements-dev.txt
@@ -21,6 +21,7 @@ mypy==1.9.0
 pytest==7.2.2
 pytest-timeout==2.2.0
 pytest-split==0.8.2
+pytest-xdist==3.6.1
 jsbeautifier==1.14.7
 datasets==2.9.0
 torch==2.2.1.0+cpu