diff --git a/.github/workflows/test_onnxruntime.yml b/.github/workflows/test_onnxruntime.yml
index e3e13f5ecbc..a1275b2e085 100644
--- a/.github/workflows/test_onnxruntime.yml
+++ b/.github/workflows/test_onnxruntime.yml
@@ -56,11 +56,4 @@ jobs:
       - name: Test with pytest (in series)
         working-directory: tests
         run: |
-          pytest onnxruntime -m "run_in_series" --durations=0 -vvvv -s
-
-      - name: Test with pytest (in parallel)
-        env:
-          HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-        working-directory: tests
-        run: |
-          pytest onnxruntime -m "not run_in_series" --durations=0 -vvvv -s -n auto
+          pytest onnxruntime -k test_compare_to_transformers_ort
diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
index 9187b851fc0..f02d9eca5e8 100644
--- a/tests/onnxruntime/test_modeling.py
+++ b/tests/onnxruntime/test_modeling.py
@@ -2315,18 +2315,8 @@ def test_compare_to_io_binding(self, model_arch):
 
 class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin):
     SUPPORTED_ARCHITECTURES = [
-        "bloom",
-        "codegen",
-        "falcon",
-        "gpt2",
-        "gpt_bigcode",
-        "gpt_neo",
-        "gpt_neox",
-        "gptj",
-        "llama",
-        "mistral",
+
         "mpt",
-        "opt",
     ]
 
     if check_if_transformers_greater("4.37"):
@@ -2420,7 +2410,7 @@ def test_merge_from_onnx_and_save(self, model_arch):
             self.assertNotIn(ONNX_WEIGHTS_NAME, folder_contents)
 
     @parameterized.expand(grid_parameters({**FULL_GRID, "num_beams": [1, 4]}))
-    def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cache: bool, num_beams: int):
+    def test_compare_to_transformers_ort(self, test_name: str, model_arch: str, use_cache: bool, num_beams: int):
         use_io_binding = None
         if use_cache is False:
             use_io_binding = False