Parallel tests (#4960)

* Improve coverage config * Rearrange tests to speed them up. * Run TPU and GPU tests serially. * Automate sharding
pytorch · May 2, 2023 · 37bfcd8 · 37bfcd8
1 parent 17d426d
commit 37bfcd8
Show file tree

Hide file tree

Showing 10 changed files with 12,206 additions and 12,088 deletions.
diff --git a/.bazelrc b/.bazelrc
@@ -71,6 +71,10 @@ build:short_logs --output_filter=DONT_MATCH_ANYTHING
 #build:tpu --@org_tensorflow//tensorflow/compiler/xla/python:enable_tpu=true
 build:tpu --define=with_tpu_support=true
 
+# Run tests serially with TPU and GPU (only 1 device is available).
+test:tpu --local_test_jobs=1
+test:cuda --local_test_jobs=1
+
 #########################################################################
 # RBE config options below.
 # Flag to enable remote config

diff --git a/test/cpp/BUILD b/test/cpp/BUILD
@@ -46,56 +46,39 @@ ptxla_cc_library(
     ],
 )
 
-ptxla_cc_library(
+ptxla_cc_test(
     name = "test_ir",
     srcs = ["test_ir.cpp"],
     deps = [
         ":cpp_test_util",
         ":torch_xla_test",
         "//torch_xla/csrc:tensor",
-        "@com_google_googletest//:gtest",
-    ],
-    alwayslink = True,
-)
-
-ptxla_cc_library(
-    name = "test_aten_xla_tensor",
-    srcs = ["test_aten_xla_tensor.cpp"],
-    deps = [
-        ":cpp_test_util",
-        ":torch_xla_test",
-        "//third_party/xla_client:metrics",
-        "//torch_xla/csrc:tensor",
-        "@com_google_googletest//:gtest",
-        "@org_tensorflow//tensorflow/compiler/xla:permutation_util",
+        "@com_google_googletest//:gtest_main",
     ],
-    alwayslink = True,
 )
 
-ptxla_cc_library(
+ptxla_cc_test(
     name = "test_lazy",
     srcs = ["test_lazy.cpp"],
     deps = [
         ":torch_xla_test",
         "//torch_xla/csrc:tensor",
-        "@com_google_googletest//:gtest",
+        "@com_google_googletest//:gtest_main",
         "@org_tensorflow//tensorflow/compiler/xla:shape_util",
     ],
-    alwayslink = True,
 )
 
-ptxla_cc_library(
+ptxla_cc_test(
     name = "test_op_by_op_executor",
     srcs = ["test_op_by_op_executor.cpp"],
     deps = [
         ":cpp_test_util",
         "//torch_xla/csrc:tensor",
-        "@com_google_googletest//:gtest",
+        "@com_google_googletest//:gtest_main",
     ],
-    alwayslink = True,
 )
 
-ptxla_cc_library(
+ptxla_cc_test(
     name = "test_replication",
     srcs = ["test_replication.cpp"],
     deps = [
@@ -106,48 +89,37 @@ ptxla_cc_library(
         "//third_party/xla_client:multi_wait",
         "//third_party/xla_client:thread_pool",
         "//torch_xla/csrc:tensor",
-        "@com_google_googletest//:gtest",
+        "@com_google_googletest//:gtest_main",
         "@org_tensorflow//tensorflow/compiler/xla:shape_util",
         "@org_tensorflow//tensorflow/compiler/xla/client:xla_builder",
     ],
-    alwayslink = True,
 )
 
-ptxla_cc_library(
-    name = "test_symint",
-    srcs = ["test_symint.cpp"],
-    deps = [
-        ":cpp_test_util",
-        "//torch_xla/csrc:tensor",
-        "@com_google_googletest//:gtest",
-    ],
-    alwayslink = True,
-)
-
-ptxla_cc_library(
+ptxla_cc_test(
     name = "test_tensor",
-    srcs = ["test_tensor.cpp"],
+    srcs = [
+        "test_symint.cpp",
+        "test_tensor.cpp",
+    ],
     deps = [
         ":cpp_test_util",
         ":torch_xla_test",
         "//torch_xla/csrc:tensor",
-        "@com_google_googletest//:gtest",
+        "@com_google_googletest//:gtest_main",
     ],
-    alwayslink = True,
 )
 
-ptxla_cc_library(
+ptxla_cc_test(
     name = "test_xla_backend_intf",
     srcs = ["test_xla_backend_intf.cpp"],
     deps = [
         ":cpp_test_util",
         "//torch_xla/csrc:tensor",
-        "@com_google_googletest//:gtest",
+        "@com_google_googletest//:gtest_main",
     ],
-    alwayslink = True,
 )
 
-ptxla_cc_library(
+ptxla_cc_test(
     name = "test_xla_sharding",
     srcs = ["test_xla_sharding.cpp"],
     deps = [
@@ -156,30 +128,26 @@ ptxla_cc_library(
         "//third_party/xla_client:env_vars",
         "//third_party/xla_client:sys_util",
         "//torch_xla/csrc:tensor",
-        "@com_google_googletest//:gtest",
+        "@com_google_googletest//:gtest_main",
         "@org_tensorflow//tensorflow/compiler/xla:xla_data_proto_cc",
     ],
-    alwayslink = True,
 )
 
-# Keep all the tests in the same binary as long as XRT is supported.
-# Once PjRT is the only runtime, make each test library a test.
-# The reason for this is that we want to run tests in parallel
-# but XRT gRPC ports cannot be reused and are inhereted from the environment.
-ptxla_cc_test(
-    name = "main",
-    size = "enormous",
-    srcs = ["main.cpp"],
-    deps = [
-        ":test_aten_xla_tensor",
-        ":test_ir",
-        ":test_lazy",
-        ":test_op_by_op_executor",
-        ":test_replication",
-        ":test_symint",
-        ":test_tensor",
-        ":test_xla_backend_intf",
-        ":test_xla_sharding",
-        "@com_google_googletest//:gtest_main",
-    ],
-)
+# This tets is very large so it's split into shards.
+# To make it run fast, please add new shards when needed.
+[
+    ptxla_cc_test(
+        name = test[:-len(".cpp")],
+        size = "enormous",
+        srcs = [test],
+        deps = [
+            ":cpp_test_util",
+            ":torch_xla_test",
+            "//third_party/xla_client:metrics",
+            "//torch_xla/csrc:tensor",
+            "@com_google_googletest//:gtest_main",
+            "@org_tensorflow//tensorflow/compiler/xla:permutation_util",
+        ],
+    )
+    for test in glob(["test_aten_xla_tensor*cpp"])
+]