Skip to content

Commit

Permalink
Add fsdp tests back to GPU CI (#5674)
Browse files Browse the repository at this point in the history
* use manfei's change

* use the new flag

* reduce data size

* reduce batch size

* keep reducing batch size to 64

* remove comments
  • Loading branch information
vanbasten23 authored and qihqi committed Oct 10, 2023
1 parent 2db113e commit 934adc9
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
6 changes: 3 additions & 3 deletions .circleci/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,9 @@ function run_torch_xla_python_tests() {
# GPU tests
if [ -x "$(command -v nvidia-smi)" ]; then
# These tests fail on GPU with 03/30 TF-pin update (https://github.com/pytorch/xla/pull/4840)
# PJRT_DEVICE=GPU python test/test_train_mp_imagenet_fsdp.py --fake_data --use_nested_fsdp --use_small_fake_sample --num_epochs=1
# PJRT_DEVICE=GPU python test/test_train_mp_imagenet_fsdp.py --fake_data --auto_wrap_policy type_based --use_small_fake_sample --num_epochs=1
# XLA_DISABLE_FUNCTIONALIZATION=1 PJRT_DEVICE=GPU python test/test_train_mp_imagenet_fsdp.py --fake_data --use_nested_fsdp --use_small_fake_sample --num_epochs=1
PJRT_DEVICE=GPU python test/test_train_mp_imagenet_fsdp.py --fake_data --use_nested_fsdp --use_small_fake_sample --num_epochs=1
PJRT_DEVICE=GPU python test/test_train_mp_imagenet_fsdp.py --fake_data --auto_wrap_policy type_based --use_small_fake_sample --num_epochs=1
XLA_DISABLE_FUNCTIONALIZATION=1 PJRT_DEVICE=GPU python test/test_train_mp_imagenet_fsdp.py --fake_data --use_nested_fsdp --use_small_fake_sample --num_epochs=1
# Syncfree SGD optimizer tests
if [ -d ./torch_xla/amp/syncfree ]; then
echo "Running Syncfree Optimizer Test"
Expand Down
2 changes: 1 addition & 1 deletion test/test_train_mp_imagenet_fsdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@
transformer_auto_wrap_policy)

DEFAULT_KWARGS = dict(
batch_size=128,
batch_size=64,
test_set_batch_size=64,
num_epochs=18,
momentum=0.9,
Expand Down

0 comments on commit 934adc9

Please sign in to comment.