From 91ac965ad96e9b8b3ffab2af542b49b10fa06759 Mon Sep 17 00:00:00 2001 From: Xiongfei Wei Date: Tue, 29 Aug 2023 17:12:38 +0000 Subject: [PATCH 1/6] use manfei's change --- .circleci/common.sh | 3 +++ openxla_patches/service.diff | 14 ++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 openxla_patches/service.diff diff --git a/.circleci/common.sh b/.circleci/common.sh index 1642480a6b4..8983eb757a9 100755 --- a/.circleci/common.sh +++ b/.circleci/common.sh @@ -154,6 +154,9 @@ function run_torch_xla_python_tests() { # PJRT_DEVICE=GPU python test/test_train_mp_imagenet_fsdp.py --fake_data --use_nested_fsdp --use_small_fake_sample --num_epochs=1 # PJRT_DEVICE=GPU python test/test_train_mp_imagenet_fsdp.py --fake_data --auto_wrap_policy type_based --use_small_fake_sample --num_epochs=1 # XLA_DISABLE_FUNCTIONALIZATION=1 PJRT_DEVICE=GPU python test/test_train_mp_imagenet_fsdp.py --fake_data --use_nested_fsdp --use_small_fake_sample --num_epochs=1 + PJRT_DEVICE=GPU python test/test_train_mp_imagenet_fsdp.py --fake_data --use_nested_fsdp --use_small_fake_sample --num_epochs=1 + PJRT_DEVICE=GPU python test/test_train_mp_imagenet_fsdp.py --fake_data --auto_wrap_policy type_based --use_small_fake_sample --num_epochs=1 + XLA_DISABLE_FUNCTIONALIZATION=1 PJRT_DEVICE=GPU python test/test_train_mp_imagenet_fsdp.py --fake_data --use_nested_fsdp --use_small_fake_sample --num_epochs=1 # Syncfree SGD optimizer tests if [ -d ./torch_xla/amp/syncfree ]; then echo "Running Syncfree Optimizer Test" diff --git a/openxla_patches/service.diff b/openxla_patches/service.diff new file mode 100644 index 00000000000..93ebde3bf8e --- /dev/null +++ b/openxla_patches/service.diff @@ -0,0 +1,14 @@ +delete this patch after openxla pin updated to date after or equal to 08/09 +diff --git a/xla/service/layout_assignment.cc b/xla/service/layout_assignment.cc +index f69224f42..43feda2a0 100644 +--- a/xla/service/layout_assignment.cc ++++ b/xla/service/layout_assignment.cc +@@ -154,7 +154,7 @@ OperandLayoutConstraint::OperandLayoutConstraint( + instruction_(instruction), + operand_no_(operand_no) { + CHECK(shape_layout.LayoutIsSet()); +- CHECK(ShapeUtil::CompatibleIgnoringElementType( ++ CHECK(ShapeUtil::CompatibleKind( + shape_layout.shape(), instruction->operand(operand_no)->shape())) + << shape_layout.shape() << " is not compatible with " + << instruction->operand(operand_no)->shape() << " (for operand " From 2b8851e75c3ea3ce358fa47aeb75046aa41c39a6 Mon Sep 17 00:00:00 2001 From: Xiongfei Wei Date: Tue, 29 Aug 2023 17:55:49 +0000 Subject: [PATCH 2/6] use the new flag --- torch_xla/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/torch_xla/__init__.py b/torch_xla/__init__.py index cfde8456617..b9810fd4dd7 100644 --- a/torch_xla/__init__.py +++ b/torch_xla/__init__.py @@ -27,6 +27,7 @@ def _setup_xla_flags(): flags, (('xla_gpu_simplify_all_fp_conversions', 'false'),)) flags = _set_missing_flags(flags, (('xla_gpu_force_compilation_parallelism', '8'),)) + flags = _set_missing_flags(flags, (('xla_gpu_copy_insertion_use_region_analysis', 'false'),)) os.environ['XLA_FLAGS'] = ' '.join(flags) From 7e4d240906814d78cd6fad6e7672379d47c6926d Mon Sep 17 00:00:00 2001 From: iefgnoix Date: Wed, 4 Oct 2023 14:47:45 -0700 Subject: [PATCH 3/6] reduce data size --- openxla_patches/service.diff | 14 -------------- test/test_train_mp_imagenet_fsdp.py | 2 +- torch_xla/__init__.py | 1 - 3 files changed, 1 insertion(+), 16 deletions(-) delete mode 100644 openxla_patches/service.diff diff --git a/openxla_patches/service.diff b/openxla_patches/service.diff deleted file mode 100644 index 93ebde3bf8e..00000000000 --- a/openxla_patches/service.diff +++ /dev/null @@ -1,14 +0,0 @@ -delete this patch after openxla pin updated to date after or equal to 08/09 -diff --git a/xla/service/layout_assignment.cc b/xla/service/layout_assignment.cc -index f69224f42..43feda2a0 100644 ---- a/xla/service/layout_assignment.cc -+++ b/xla/service/layout_assignment.cc -@@ -154,7 +154,7 @@ OperandLayoutConstraint::OperandLayoutConstraint( - instruction_(instruction), - operand_no_(operand_no) { - CHECK(shape_layout.LayoutIsSet()); -- CHECK(ShapeUtil::CompatibleIgnoringElementType( -+ CHECK(ShapeUtil::CompatibleKind( - shape_layout.shape(), instruction->operand(operand_no)->shape())) - << shape_layout.shape() << " is not compatible with " - << instruction->operand(operand_no)->shape() << " (for operand " diff --git a/test/test_train_mp_imagenet_fsdp.py b/test/test_train_mp_imagenet_fsdp.py index a40f3bef74f..494dc07edd3 100644 --- a/test/test_train_mp_imagenet_fsdp.py +++ b/test/test_train_mp_imagenet_fsdp.py @@ -167,7 +167,7 @@ def train_imagenet(): img_dim = get_model_property('img_dim') if FLAGS.fake_data: use_small_fake_sample = FLAGS.use_small_fake_sample - train_dataset_len = 50000 if use_small_fake_sample else 1200000 # Roughly the size of Imagenet dataset. + train_dataset_len = 5000 if use_small_fake_sample else 1200000 # Roughly the size of Imagenet dataset. train_loader = xu.SampleGenerator( data=(torch.zeros(FLAGS.batch_size, 3, img_dim, img_dim), torch.zeros(FLAGS.batch_size, dtype=torch.int64)), diff --git a/torch_xla/__init__.py b/torch_xla/__init__.py index b9810fd4dd7..cfde8456617 100644 --- a/torch_xla/__init__.py +++ b/torch_xla/__init__.py @@ -27,7 +27,6 @@ def _setup_xla_flags(): flags, (('xla_gpu_simplify_all_fp_conversions', 'false'),)) flags = _set_missing_flags(flags, (('xla_gpu_force_compilation_parallelism', '8'),)) - flags = _set_missing_flags(flags, (('xla_gpu_copy_insertion_use_region_analysis', 'false'),)) os.environ['XLA_FLAGS'] = ' '.join(flags) From 25af5a15e0a21335ee08246905bd95b3b692e1d9 Mon Sep 17 00:00:00 2001 From: iefgnoix Date: Wed, 4 Oct 2023 16:24:53 -0700 Subject: [PATCH 4/6] reduce batch size --- test/test_train_mp_imagenet_fsdp.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_train_mp_imagenet_fsdp.py b/test/test_train_mp_imagenet_fsdp.py index 494dc07edd3..aba3f5773e0 100644 --- a/test/test_train_mp_imagenet_fsdp.py +++ b/test/test_train_mp_imagenet_fsdp.py @@ -110,7 +110,7 @@ transformer_auto_wrap_policy) DEFAULT_KWARGS = dict( - batch_size=128, + batch_size=96, test_set_batch_size=64, num_epochs=18, momentum=0.9, @@ -167,7 +167,7 @@ def train_imagenet(): img_dim = get_model_property('img_dim') if FLAGS.fake_data: use_small_fake_sample = FLAGS.use_small_fake_sample - train_dataset_len = 5000 if use_small_fake_sample else 1200000 # Roughly the size of Imagenet dataset. + train_dataset_len = 50000 if use_small_fake_sample else 1200000 # Roughly the size of Imagenet dataset. train_loader = xu.SampleGenerator( data=(torch.zeros(FLAGS.batch_size, 3, img_dim, img_dim), torch.zeros(FLAGS.batch_size, dtype=torch.int64)), From 53a3bedb483a532a73d9b247bd80c021e7284b9b Mon Sep 17 00:00:00 2001 From: iefgnoix Date: Thu, 5 Oct 2023 13:35:49 -0700 Subject: [PATCH 5/6] keep reducing batch size to 64 --- test/test_train_mp_imagenet_fsdp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_train_mp_imagenet_fsdp.py b/test/test_train_mp_imagenet_fsdp.py index aba3f5773e0..fdfdc8a698c 100644 --- a/test/test_train_mp_imagenet_fsdp.py +++ b/test/test_train_mp_imagenet_fsdp.py @@ -110,7 +110,7 @@ transformer_auto_wrap_policy) DEFAULT_KWARGS = dict( - batch_size=96, + batch_size=64, test_set_batch_size=64, num_epochs=18, momentum=0.9, From 80803f49ce7dd4709638afd7c7ba709b88671656 Mon Sep 17 00:00:00 2001 From: iefgnoix Date: Fri, 6 Oct 2023 15:15:26 -0700 Subject: [PATCH 6/6] remove comments --- .circleci/common.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/.circleci/common.sh b/.circleci/common.sh index 8983eb757a9..88c5fed8efc 100755 --- a/.circleci/common.sh +++ b/.circleci/common.sh @@ -151,9 +151,6 @@ function run_torch_xla_python_tests() { # GPU tests if [ -x "$(command -v nvidia-smi)" ]; then # These tests fail on GPU with 03/30 TF-pin update (https://github.com/pytorch/xla/pull/4840) - # PJRT_DEVICE=GPU python test/test_train_mp_imagenet_fsdp.py --fake_data --use_nested_fsdp --use_small_fake_sample --num_epochs=1 - # PJRT_DEVICE=GPU python test/test_train_mp_imagenet_fsdp.py --fake_data --auto_wrap_policy type_based --use_small_fake_sample --num_epochs=1 - # XLA_DISABLE_FUNCTIONALIZATION=1 PJRT_DEVICE=GPU python test/test_train_mp_imagenet_fsdp.py --fake_data --use_nested_fsdp --use_small_fake_sample --num_epochs=1 PJRT_DEVICE=GPU python test/test_train_mp_imagenet_fsdp.py --fake_data --use_nested_fsdp --use_small_fake_sample --num_epochs=1 PJRT_DEVICE=GPU python test/test_train_mp_imagenet_fsdp.py --fake_data --auto_wrap_policy type_based --use_small_fake_sample --num_epochs=1 XLA_DISABLE_FUNCTIONALIZATION=1 PJRT_DEVICE=GPU python test/test_train_mp_imagenet_fsdp.py --fake_data --use_nested_fsdp --use_small_fake_sample --num_epochs=1