From 4d8c0c861f3c04685b08c397e69d07014a24b715 Mon Sep 17 00:00:00 2001 From: Manfei <41607353+ManfeiBai@users.noreply.github.com> Date: Mon, 4 Dec 2023 19:55:28 -0500 Subject: [PATCH] [SPMD] Add debug of SPMD for single/multi host (#5742) --- test/run_tests.sh | 1 + test/spmd/test_spmd_debugging.py | 809 ++++++++++++++++++++++++ test/tpu/xla_test_job.yaml | 2 + torch_xla/distributed/spmd/__init__.py | 17 +- torch_xla/distributed/spmd/debugging.py | 166 +++++ 5 files changed, 991 insertions(+), 4 deletions(-) create mode 100644 test/spmd/test_spmd_debugging.py create mode 100644 torch_xla/distributed/spmd/debugging.py diff --git a/test/run_tests.sh b/test/run_tests.sh index 453abb5e4692..c3fd72572592 100755 --- a/test/run_tests.sh +++ b/test/run_tests.sh @@ -203,6 +203,7 @@ function run_xla_op_tests3 { run_test "$CDIR/spmd/test_xla_sharding_hlo.py" run_test "$CDIR/spmd/test_xla_virtual_device.py" run_test "$CDIR/spmd/test_dynamo_spmd.py" + run_test "$CDIR/spmd/test_spmd_debugging.py" run_test "$CDIR/spmd/test_xla_distributed_checkpoint.py" run_test "$CDIR/spmd/test_xla_spmd_python_api_interaction.py" run_test "$CDIR/test_operations_hlo.py" "$@" --verbosity=$VERBOSITY diff --git a/test/spmd/test_spmd_debugging.py b/test/spmd/test_spmd_debugging.py new file mode 100644 index 000000000000..02139c55a5a2 --- /dev/null +++ b/test/spmd/test_spmd_debugging.py @@ -0,0 +1,809 @@ +import sys + +import unittest +from unittest.mock import patch +import math +import numpy as np +import os +import io +import rich + +import torch +import torch_xla +import torch_xla.runtime as xr +import torch_xla.utils.utils as xu +import torch_xla.core.xla_env_vars as xenv +import torch_xla.core.xla_model as xm +import torch_xla.distributed.spmd as xs +from torch_xla.distributed.spmd import XLAShardedTensor +from torch_xla.distributed.spmd import Mesh + +import test_xla_sharding_base + + +class DebuggingSpmdTest(test_xla_sharding_base.XlaShardingTest): + + @classmethod + def setUpClass(cls): + xr.use_spmd() + super().setUpClass() + + @unittest.skipIf( + not xr.using_pjrt() or + xu.getenv_as(xenv.PJRT_DEVICE, str) in ("GPU", 'CUDA', 'ROCM', 'CPU'), + f"Requires PJRT_DEVICE set to `TPU`.") + def test_debugging_spmd_single_host_tiled_tpu(self): + from torch_xla.distributed.spmd.debugging import visualize_tensor_sharding + device = xm.xla_device() + num_devices = self.n_devices + mesh_shape = (2, num_devices // 2) + device_ids = np.array(range(num_devices)) + mesh = self._get_mesh(mesh_shape) + t = torch.randn(8, 4, device=device) + partition_spec = (0, 1) + xs.mark_sharding(t, mesh, partition_spec) + sharding = torch_xla._XLAC._get_xla_sharding_spec(t) + generated_table = visualize_tensor_sharding(t) + console = rich.console.Console() + with console.capture() as capture: + console.print(generated_table) + output = capture.get() + + color = None + text_color = None + fake_table = rich.table.Table( + show_header=False, + show_lines=True, + padding=0, + highlight=True, + pad_edge=False, + box=rich.box.SQUARE) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('TPU 0', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 1', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 2', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 3', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('TPU 4', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 5', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 6', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 7', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + fake_console = rich.console.Console() + with fake_console.capture() as fake_capture: + fake_console.print(fake_table) + fake_output = fake_capture.get() + assert output == fake_output + + @unittest.skipIf( + not xr.using_pjrt() or + xu.getenv_as(xenv.PJRT_DEVICE, str) in ("GPU", 'CUDA', 'ROCM', 'CPU'), + f"Requires PJRT_DEVICE set to `TPU`.") + def test_single_host_partial_replication_tpu(self): + from torch_xla.distributed.spmd.debugging import visualize_tensor_sharding + device = xm.xla_device() + num_devices = self.n_devices + mesh_shape = (2, num_devices // 2) + device_ids = np.array(range(num_devices)) + mesh = self._get_mesh(mesh_shape) + + partition_spec = (0, None) + t = torch.randn(8, 32, device=device) + xs.mark_sharding(t, mesh, (0, None)) + sharding = torch_xla._XLAC._get_xla_sharding_spec(t) + generated_table = visualize_tensor_sharding(t) + console = rich.console.Console() + with console.capture() as capture: + console.print(generated_table) + output = capture.get() + + color = None + text_color = None + fake_table = rich.table.Table( + show_header=False, + show_lines=True, + padding=0, + highlight=True, + pad_edge=False, + box=rich.box.SQUARE) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('TPU [0, 1, 2, 3]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('TPU [4, 5, 6, 7]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + fake_console = rich.console.Console() + with fake_console.capture() as fake_capture: + fake_console.print(fake_table) + fake_output = fake_capture.get() + assert output == fake_output + + @unittest.skipIf( + not xr.using_pjrt() or + xu.getenv_as(xenv.PJRT_DEVICE, str) in ("GPU", 'CUDA', 'ROCM', 'CPU'), + f"Requires PJRT_DEVICE set to `TPU`.") + def test_single_host_replicated_tpu(self): + from torch_xla.distributed.spmd.debugging import visualize_tensor_sharding + device = xm.xla_device() + num_devices = self.n_devices + mesh_shape = (2, num_devices // 2) + device_ids = np.array(range(num_devices)) + mesh = self._get_mesh(mesh_shape) + + partition_spec_replicated = (None, None) + t = torch.randn(8, 32, device=device) + xs.mark_sharding(t, mesh, partition_spec_replicated) + sharding = torch_xla._XLAC._get_xla_sharding_spec(t) + generated_table = visualize_tensor_sharding(t) + console = rich.console.Console() + with console.capture() as capture: + console.print(generated_table) + output = capture.get() + + color = None + text_color = None + fake_table = rich.table.Table( + show_header=False, + show_lines=True, + padding=0, + highlight=True, + pad_edge=False, + box=rich.box.SQUARE) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align( + 'TPU [0, 1, 2, 3, 4, 5, 6, 7]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + fake_console = rich.console.Console() + with fake_console.capture() as fake_capture: + fake_console.print(fake_table) + fake_output = fake_capture.get() + assert output == fake_output + + @unittest.skipIf( + not xr.using_pjrt() or + xu.getenv_as(xenv.PJRT_DEVICE, str) in ("GPU", 'CUDA', 'ROCM', 'TPU'), + f"Requires PJRT_DEVICE set to `CPU`.") + def test_debugging_spmd_single_host_tiled_cpu(self): + from torch_xla.distributed.spmd.debugging import visualize_tensor_sharding + device = xm.xla_device() + num_devices = self.n_devices + mesh_shape = (1, num_devices) + device_ids = np.array(range(num_devices)) + mesh = self._get_mesh(mesh_shape) + t = torch.randn(8, 4, device=device) + partition_spec = (0, 1) + xs.mark_sharding(t, mesh, partition_spec) + sharding = torch_xla._XLAC._get_xla_sharding_spec(t) + generated_table = visualize_tensor_sharding(t) + console = rich.console.Console() + with console.capture() as capture: + console.print(generated_table) + output = capture.get() + + color = None + text_color = None + fake_table = rich.table.Table( + show_header=False, + show_lines=True, + padding=0, + highlight=True, + pad_edge=False, + box=rich.box.SQUARE) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('CPU [0]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + fake_console = rich.console.Console() + with fake_console.capture() as fake_capture: + fake_console.print(fake_table) + fake_output = fake_capture.get() + assert output == fake_output + + @unittest.skipIf( + not xr.using_pjrt() or + xu.getenv_as(xenv.PJRT_DEVICE, str) in ("GPU", 'CUDA', 'ROCM', 'TPU'), + f"Requires PJRT_DEVICE set to `CPU`.") + def test_single_host_partial_replication_cpu(self): + from torch_xla.distributed.spmd.debugging import visualize_tensor_sharding + device = xm.xla_device() + num_devices = self.n_devices + mesh_shape = (1, num_devices) + device_ids = np.array(range(num_devices)) + mesh = self._get_mesh(mesh_shape) + + partition_spec = (0, None) + t = torch.randn(8, 32, device=device) + xs.mark_sharding(t, mesh, (0, None)) + sharding = torch_xla._XLAC._get_xla_sharding_spec(t) + generated_table = visualize_tensor_sharding(t) + console = rich.console.Console() + with console.capture() as capture: + console.print(generated_table) + output = capture.get() + + color = None + text_color = None + fake_table = rich.table.Table( + show_header=False, + show_lines=True, + padding=0, + highlight=True, + pad_edge=False, + box=rich.box.SQUARE) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('CPU [0]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + fake_console = rich.console.Console() + with fake_console.capture() as fake_capture: + fake_console.print(fake_table) + fake_output = fake_capture.get() + assert output == fake_output + + @unittest.skipIf( + not xr.using_pjrt() or + xu.getenv_as(xenv.PJRT_DEVICE, str) in ("GPU", 'CUDA', 'ROCM', 'TPU'), + f"Requires PJRT_DEVICE set to `CPU`.") + def test_single_host_replicated_cpu(self): + from torch_xla.distributed.spmd.debugging import visualize_tensor_sharding + device = xm.xla_device() + num_devices = self.n_devices + mesh_shape = (1, num_devices) + device_ids = np.array(range(num_devices)) + mesh = self._get_mesh(mesh_shape) + + partition_spec_replicated = (None, None) + t = torch.randn(8, 32, device=device) + xs.mark_sharding(t, mesh, partition_spec_replicated) + sharding = torch_xla._XLAC._get_xla_sharding_spec(t) + generated_table = visualize_tensor_sharding(t) + console = rich.console.Console() + with console.capture() as capture: + console.print(generated_table) + output = capture.get() + + color = None + text_color = None + fake_table = rich.table.Table( + show_header=False, + show_lines=True, + padding=0, + highlight=True, + pad_edge=False, + box=rich.box.SQUARE) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('CPU [0]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + fake_console = rich.console.Console() + with fake_console.capture() as fake_capture: + fake_console.print(fake_table) + fake_output = fake_capture.get() + assert output == fake_output + + +# Multi-host tests +# e.g.: sharding={devices=[2,8]0,4,8,12,2,6,10,14,1,5,9,13,3,7,11,15} +# e.g.: sharding={devices=[8,1,2]0,1,4,5,8,9,12,13,2,3,6,7,10,11,14,15 last_tile_dim_replicate} +# e.g.: sharding={replicated} + + @unittest.skipIf( + not xr.using_pjrt() or + xu.getenv_as(xenv.PJRT_DEVICE, str) in ("GPU", 'CUDA', 'ROCM', 'CPU'), + f"Requires PJRT_DEVICE set to `TPU`.") + def test_debugging_spmd_multi_host_tiled_tpu(self): + from torch_xla.distributed.spmd.debugging import visualize_sharding + sharding = '{devices=[2,8]0,4,8,12,2,6,10,14,1,5,9,13,3,7,11,15}' + generated_table = visualize_sharding(sharding) + console = rich.console.Console() + with console.capture() as capture: + console.print(generated_table) + output = capture.get() + + color = None + text_color = None + fake_table = rich.table.Table( + show_header=False, + show_lines=True, + padding=0, + highlight=True, + pad_edge=False, + box=rich.box.SQUARE) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('TPU 0', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 4', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 8', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 12', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 2', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 6', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 10', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 14', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('TPU 1', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 5', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 9', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 13', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 3', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 7', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 11', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('TPU 15', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + fake_console = rich.console.Console() + with fake_console.capture() as fake_capture: + fake_console.print(fake_table) + fake_output = fake_capture.get() + assert output == fake_output + + @unittest.skipIf( + not xr.using_pjrt() or + xu.getenv_as(xenv.PJRT_DEVICE, str) in ("GPU", 'CUDA', 'ROCM', 'CPU'), + f"Requires PJRT_DEVICE set to `TPU`.") + def test_multi_host_partial_replication_tpu(self): + from torch_xla.distributed.spmd.debugging import visualize_sharding + sharding = '{devices=[8,1,2]0,1,4,5,8,9,12,13,2,3,6,7,10,11,14,15 last_tile_dim_replicate}' + generated_table = visualize_sharding(sharding) + console = rich.console.Console() + with console.capture() as capture: + console.print(generated_table) + output = capture.get() + + color = None + text_color = None + fake_table = rich.table.Table( + show_header=False, + show_lines=True, + padding=0, + highlight=True, + pad_edge=False, + box=rich.box.SQUARE) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('TPU [0, 1]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('TPU [4, 5]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('TPU [8, 9]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('TPU [12, 13]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('TPU [2, 3]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('TPU [6, 7]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('TPU [10, 11]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('TPU [14, 15]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + fake_console = rich.console.Console() + with fake_console.capture() as fake_capture: + fake_console.print(fake_table) + fake_output = fake_capture.get() + assert output == fake_output + + @unittest.skipIf( + not xr.using_pjrt() or + xu.getenv_as(xenv.PJRT_DEVICE, str) in ("GPU", 'CUDA', 'ROCM', 'CPU'), + f"Requires PJRT_DEVICE set to `TPU`.") + def test_multi_host_replicated_tpu(self): + from torch_xla.distributed.spmd.debugging import visualize_sharding + sharding = '{replicated}' + generated_table = visualize_sharding(sharding) + console = rich.console.Console() + with console.capture() as capture: + console.print(generated_table) + output = capture.get() + + color = None + text_color = None + fake_table = rich.table.Table( + show_header=False, + show_lines=True, + padding=0, + highlight=True, + pad_edge=False, + box=rich.box.SQUARE) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align( + 'TPU [0, 1, 2, 3, 4, 5, 6, 7]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + fake_console = rich.console.Console() + with fake_console.capture() as fake_capture: + fake_console.print(fake_table) + fake_output = fake_capture.get() + assert output == fake_output + + @unittest.skipIf( + not xr.using_pjrt() or + xu.getenv_as(xenv.PJRT_DEVICE, str) in ("GPU", 'CUDA', 'ROCM', 'TPU'), + f"Requires PJRT_DEVICE set to `CPU`.") + def test_debugging_spmd_multi_host_tiled_cpu(self): + from torch_xla.distributed.spmd.debugging import visualize_sharding + sharding = '{devices=[2,8]0,4,8,12,2,6,10,14,1,5,9,13,3,7,11,15}' + generated_table = visualize_sharding(sharding) + console = rich.console.Console() + with console.capture() as capture: + console.print(generated_table) + output = capture.get() + + color = None + text_color = None + fake_table = rich.table.Table( + show_header=False, + show_lines=True, + padding=0, + highlight=True, + pad_edge=False, + box=rich.box.SQUARE) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('CPU 0', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('CPU 4', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('CPU 8', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('CPU 12', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('CPU 2', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('CPU 6', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('CPU 10', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('CPU 14', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('CPU 1', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('CPU 5', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('CPU 9', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('CPU 13', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('CPU 3', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('CPU 7', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('CPU 11', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + col.append( + rich.padding.Padding( + rich.align.Align('CPU 15', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + fake_console = rich.console.Console() + with fake_console.capture() as fake_capture: + fake_console.print(fake_table) + fake_output = fake_capture.get() + assert output == fake_output + + @unittest.skipIf( + not xr.using_pjrt() or + xu.getenv_as(xenv.PJRT_DEVICE, str) in ("GPU", 'CUDA', 'ROCM', 'TPU'), + f"Requires PJRT_DEVICE set to `CPU`.") + def test_multi_host_partial_replication_cpu(self): + from torch_xla.distributed.spmd.debugging import visualize_sharding + sharding = '{devices=[8,1,2]0,1,4,5,8,9,12,13,2,3,6,7,10,11,14,15 last_tile_dim_replicate}' + generated_table = visualize_sharding(sharding) + console = rich.console.Console() + with console.capture() as capture: + console.print(generated_table) + output = capture.get() + + color = None + text_color = None + fake_table = rich.table.Table( + show_header=False, + show_lines=True, + padding=0, + highlight=True, + pad_edge=False, + box=rich.box.SQUARE) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('CPU [0, 1]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('CPU [4, 5]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('CPU [8, 9]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('CPU [12, 13]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('CPU [2, 3]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('CPU [6, 7]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('CPU [10, 11]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('CPU [14, 15]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + fake_console = rich.console.Console() + with fake_console.capture() as fake_capture: + fake_console.print(fake_table) + fake_output = fake_capture.get() + assert output == fake_output + + @unittest.skipIf( + not xr.using_pjrt() or + xu.getenv_as(xenv.PJRT_DEVICE, str) in ("GPU", 'CUDA', 'ROCM', 'TPU'), + f"Requires PJRT_DEVICE set to `CPU`.") + def test_multi_host_replicated_cpu(self): + from torch_xla.distributed.spmd.debugging import visualize_sharding + sharding = '{replicated}' + generated_table = visualize_sharding(sharding) + console = rich.console.Console() + with console.capture() as capture: + console.print(generated_table) + output = capture.get() + + color = None + text_color = None + fake_table = rich.table.Table( + show_header=False, + show_lines=True, + padding=0, + highlight=True, + pad_edge=False, + box=rich.box.SQUARE) + col = [] + col.append( + rich.padding.Padding( + rich.align.Align('CPU [0]', "center", vertical="middle"), + (1, 1, 1, 1), + style=rich.style.Style(bgcolor=color, color=text_color))) + fake_table.add_row(*col) + fake_console = rich.console.Console() + with fake_console.capture() as fake_capture: + fake_console.print(fake_table) + fake_output = fake_capture.get() + assert output == fake_output + +if __name__ == '__main__': + test = unittest.main() + sys.exit(0 if test.result.wasSuccessful() else 1) diff --git a/test/tpu/xla_test_job.yaml b/test/tpu/xla_test_job.yaml index 99d59d286dcd..e727953ddc43 100644 --- a/test/tpu/xla_test_job.yaml +++ b/test/tpu/xla_test_job.yaml @@ -42,6 +42,7 @@ spec: - -cxe - | pip install expecttest + pip install rich python3 /src/pytorch/xla/test/test_operations.py -v python3 /src/pytorch/xla/test/pjrt/test_runtime_tpu.py @@ -55,6 +56,7 @@ spec: XLA_EXPERIMENTAL=nonzero:masked_select python3 /src/pytorch/xla/test/ds/test_dynamic_shapes.py -v python3 /src/pytorch/xla/test/test_autocast.py python3 /src/pytorch/xla/test/dynamo/test_dynamo.py + python3 /src/pytorch/xla/test/spmd/test_spmd_debugging.py volumeMounts: - mountPath: /dev/shm name: dshm diff --git a/torch_xla/distributed/spmd/__init__.py b/torch_xla/distributed/spmd/__init__.py index 3cd50e1e7c05..7f494b74c9d3 100644 --- a/torch_xla/distributed/spmd/__init__.py +++ b/torch_xla/distributed/spmd/__init__.py @@ -5,8 +5,17 @@ from .api import xla_distribute_tensor, xla_distribute_module __all__ = [ - "XLAShard", "XLAShardedTensor", "Mesh", "HybridMesh", "ShardingType", - "ShardingSpec", "XLAPatchedLinear", "mark_sharding", "clear_sharding", - "wrap_if_sharded", "xla_distribute_tensor", "xla_distribute_module", - "xla_patched_nn_linear_forward" + "XLAShard", + "XLAShardedTensor", + "Mesh", + "HybridMesh", + "ShardingType", + "ShardingSpec", + "XLAPatchedLinear", + "mark_sharding", + "clear_sharding", + "wrap_if_sharded", + "xla_distribute_tensor", + "xla_distribute_module", + "xla_patched_nn_linear_forward", ] diff --git a/torch_xla/distributed/spmd/debugging.py b/torch_xla/distributed/spmd/debugging.py new file mode 100644 index 000000000000..508d8cbb371c --- /dev/null +++ b/torch_xla/distributed/spmd/debugging.py @@ -0,0 +1,166 @@ +from collections.abc import Sequence +import functools +import string +import sys +from typing import Any, Callable, Optional, Union +import weakref + +import numpy as np +import torch +import torch_xla +import torch_xla.core.xla_model as xm +import torch_xla.runtime as xr +from torch_xla.distributed.spmd.xla_sharding import * +import torch_xla.utils.utils as xu +import torch_xla.core.xla_env_vars as xenv +from torch_xla.distributed.spmd import XLAShardedTensor + +try: + import rich + import rich.align + import rich.box + import rich.console + import rich.padding + import rich.style + import rich.table + RICH_ENABLED = True +except: + RICH_ENABLED = False + + +def visualize_sharding(sharding: str, + use_color: bool = True, + scale: float = 1., + min_width: int = 9, + max_width: int = 80): + """Visualizes a ``Sharding`` using ``rich``. + Args: + sharding (`str`): sharding of given tensor with SPMD + use_color (`bool`): whether use color or not + scale (`float`): scale of table visualized in console + min_width (`int`): min width used to setup table to visualize + max_width (`int`): max width used to setup table to visualize + Returns: + table to visualize given tensor sharding. This function + will also visualize the sharding of the tensor without as return. + """ + + if not RICH_ENABLED: + raise ValueError("`visualize_sharding` requires `rich` to be installed.") + + slices: dict[tuple[int, ...], set[int]] = {} + heights: dict[tuple[int, ...], Optional[float]] = {} + widths: dict[tuple[int, ...], float] = {} + + if len(sharding) >= 0: + # sharding is longer than 0 + # eg: '{devices=[2,2]0,1,2,3}' + # eg: '{replicated}' + # eg: '{devices=[2,1,2]0,1,2,3 last_tile_dim_replicate}' + if sharding == '{replicated}' or len(sharding) == 0: + heights = 1 + widths = 1 + num_devices = xr.global_runtime_device_count() + device_ids = list(range(num_devices)) + slices.setdefault((0, 0), device_ids) + else: + sharding_spac = sharding[sharding.index('['):sharding.index(']') + 1] + device_list_original = sharding.split(' last_tile_dim_replicate') + if len(device_list_original) == 2 and device_list_original[1] == '}': + try: + device_list_original_first = device_list_original[0] + device_list = device_list_original_first[device_list_original_first. + index(']') + 1:] + device_indices_map = [int(s) for s in device_list.split(',')] + heights = int(sharding_spac[1]) + widths = int(sharding_spac[3]) + last_dim_depth = int(sharding_spac[5]) + devices_len = len(device_indices_map) + len_after_dim_down = devices_len // last_dim_depth + for i in range(len_after_dim_down): + slices.setdefault( + (i // widths, i % widths), + device_indices_map[i * last_dim_depth:(i + 1) * last_dim_depth]) + except: + raise ValueError("sharding ", sharding, + " is not organized as expected") + else: + # eg: '{devices=[2,2]0,1,2,3}' + try: + assert device_list_original[0][-1] == '}' + except: + raise ValueError("sharding ", sharding, + " is not organized as expected") + try: + device_list_original_first = device_list_original[0] + device_list = device_list_original_first[device_list_original_first. + index(']') + 1:-1] + device_indices_map = [int(i) for i in device_list.split(',')] + heights = int(sharding_spac[1]) + widths = int(sharding_spac[3]) + devices_len = len(device_indices_map) + for i in range(devices_len): + slices.setdefault((i // widths, i % widths), device_indices_map[i]) + except: + raise ValueError("sharding ", sharding, + " is not organized as expected") + else: + raise ValueError("sharding length should >= 0") + + num_rows = heights + num_cols = widths + + console = rich.console.Console(width=max_width) + use_color = use_color and console.color_system is not None + + base_height = int(3 * scale) + aspect_ratio = 1 + base_width = int(base_height * aspect_ratio) + height_to_width_ratio = 1.5 + + pjrt_device = xu.getenv_as(xenv.PJRT_DEVICE, str) + device_kind = pjrt_device + + table = rich.table.Table( + show_header=False, + show_lines=not use_color, + padding=0, + highlight=not use_color, + pad_edge=False, + box=rich.box.SQUARE if not use_color else None) + for i in range(num_rows): + col = [] + for j in range(num_cols): + entry = f"{device_kind} " + str(slices[i, j]) + width, maybe_height = widths, heights + width = int(width * base_width * height_to_width_ratio) + if maybe_height is None: + height = 1 + else: + height = int(maybe_height * base_height) + width = min(max(width, min_width), max_width) + + color = None + text_color = None + + padding = (1, 1, 1, 1) + + col.append( + rich.padding.Padding( + rich.align.Align(entry, "center", vertical="middle"), + padding, + style=rich.style.Style(bgcolor=color, color=text_color))) + table.add_row(*col) + console.print(table, end='\n\n') + return table + + +def visualize_tensor_sharding(t, **kwargs): + """Visualizes an array's sharding.""" + + # XLAShardedTensor is-a torch.Tensor + def maybe_unwrap(t: torch.Tensor) -> torch.Tensor: + return t.global_tensor if isinstance(t, XLAShardedTensor) else t + + sharding = torch_xla._XLAC._get_xla_sharding_spec(maybe_unwrap(t)) + return visualize_sharding(sharding, **kwargs)