diff --git a/tests/pytorch/nightly/llama2-model.libsonnet b/tests/pytorch/nightly/llama2-model.libsonnet
index 5e26006d1..483ed5100 100644
--- a/tests/pytorch/nightly/llama2-model.libsonnet
+++ b/tests/pytorch/nightly/llama2-model.libsonnet
@@ -92,53 +92,6 @@ local utils = import 'templates/utils.libsonnet';
       |||,
     },
   },
-  local fsdp = self.fsdp,
-  fsdp:: common.PyTorchTpuVmMixin {
-    modelName+: '-train-fsdp',
-    tpuSettings+: {
-      tpuVmExtraSetup: |||
-        pip3 uninstall torch torch_xla torchvision libtpu-nightly -y
-        sudo apt update -y
-        sudo apt-get update -y
-        pip install accelerate -U
-        sudo apt-get install libomp5 -y
-        pip3 install mkl mkl-include
-        pip3 install tf-nightly tb-nightly tbp-nightly
-        pip3 install numpy
-        sudo apt-get install numactl -y
-        sudo apt-get install libopenblas-dev -y
-        pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch-nightly-cp310-cp310-linux_x86_64.whl
-        pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-nightly-cp310-cp310-linux_x86_64.whl
-        pip3 install torch_xla[tpuvm]
-
-        # install tokenizer model
-        wget https://storage.googleapis.com/tpu-pytorch/lsiyuan-experiment/llama/spiece.model
-
-        # git clone and build transformers ### llama/transformers/
-        git clone -b lsiyuan/fsdp-data-aug https://github.com/pytorch-tpu/transformers.git
-        cd transformers
-        sudo pip3 uninstall transformers
-        sudo pip3 install -e .
-        pip3 install datasets
-        pip3 install evaluate
-        pip3 install scikit-learn
-        pip3 install accelerate
-        pwd
-        ls
-
-        # 7B config
-        mkdir 7B
-        cd 7B/
-        wget https://storage.googleapis.com/tpu-pytorch/lsiyuan-experiment/configs/hf_llama/7B.json
-
-        # save llama2 training
-        echo -e 'python3 -u transformers/examples/pytorch/xla_spawn.py --num_cores 64 transformers/examples/pytorch/language-modeling/run_clm.py    --num_train_epochs 2  --dataset_name wikitext     --dataset_config_name wikitext-2-raw-v1     --per_device_train_batch_size 8 --do_train --output_dir . --overwrite_output_dir --config_name transformers/7B/7B.json --cache_dir /tmp --tokenizer_name gpt2 --block_size 1024 --optim adafactor --adafactor true  --save_strategy no --logging_strategy no' >> llama2training.sh
-        cat llama2training.sh
-        pwd
-        ls
-      |||,
-    },
-  },
   local spmd = self.spmd,
   spmd:: common.PyTorchTpuVmMixin {
     modelName+: '-train-spmd',
@@ -188,7 +141,7 @@ local utils = import 'templates/utils.libsonnet';
         # 7B config
         mkdir 7B
         cd 7B/
-        wget https://storage.mtls.cloud.google.com/hf-train-config/llama/2B.json
+        wget https://storage.googleapis.com/manfei_public_experimental/2B.json
 
         # save llama2 training
         echo -e 'python transformers/examples/pytorch/language-modeling/run_clm.py --tokenizer_name gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --per_device_train_batch_size 32 --per_device_eval_batch_size 8 --num_train_epochs 1 --do_train --output_dir /tmp/output --overwrite_output_dir --config_name transformers/7B/2B.json --save_strategy no --logging_strategy no --remove_unused_columns no --spmd_fsdp_sharding --torch_dtype bfloat16 --dataloader_drop_last yes --spmd_grad_chkpt --report_to none' >> llama2training.sh
@@ -206,7 +159,6 @@ local utils = import 'templates/utils.libsonnet';
 
   configs: [
     llama2_inference + v4_8 + common.Functional + timeouts.Hours(3) + infer,
-    // llama2_training + v4_8 + common.Functional + timeouts.Hours(3) + fsdp,
     llama2_training + v4_8 + common.Functional + timeouts.Hours(3) + spmd,
   ],
 }
diff --git a/tests/pytorch/r2.1/llama2-model.libsonnet b/tests/pytorch/r2.1/llama2-model.libsonnet
index 64aa8a64c..f1a556ec3 100644
--- a/tests/pytorch/r2.1/llama2-model.libsonnet
+++ b/tests/pytorch/r2.1/llama2-model.libsonnet
@@ -22,7 +22,7 @@ local utils = import 'templates/utils.libsonnet';
   local llama2_inference = self.llama2_inference,
   llama2_inference:: common.PyTorchTest {
     local config = self,
-    modelName: 'l2-i',
+    modelName: 'llama2-i',
     paramsOverride:: {
       scriptPath: 'llama/7B/llama2inference.sh',
       trainCommand: [
@@ -35,9 +35,9 @@ local utils = import 'templates/utils.libsonnet';
   local llama2_training = self.llama2_training,
   llama2_training:: common.PyTorchTest {
     local config = self,
-    modelName: 'l2-t',
+    modelName: 'llama2-t',
     paramsOverride:: {
-      scriptPath: 'llama/transformers/7B/llama2training.sh',
+      scriptPath: 'transformers/7B/llama2training.sh',
       trainCommand: [
         'bash',
         self.scriptPath,
@@ -45,9 +45,9 @@ local utils = import 'templates/utils.libsonnet';
     },
     command: self.paramsOverride.trainCommand,
   },
-  local pjrt = self.pjrt,
-  pjrt:: common.PyTorchTpuVmMixin {
-    modelName+: '-n-i',
+  local infer = self.infer,
+  infer:: common.PyTorchTpuVmMixin {
+    modelName+: '-infer',
     tpuSettings+: {
       tpuVmExtraSetup: |||
         pip3 uninstall torch torch_xla torchvision libtpu-nightly -y
@@ -58,11 +58,11 @@ local utils = import 'templates/utils.libsonnet';
         pip3 install numpy
         sudo apt-get install numactl -y
         sudo apt-get install libopenblas-dev -y
-        # TODO change back to torch2.1 once pytorch released torch2.1 from current used pre-release wheel
+        # TODO change back to torch2.1 once pytorch released torch2.1
         # pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch-nightly-cp310-cp310-linux_x86_64.whl
         # pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-nightly-cp310-cp310-linux_x86_64.whl
         # pip3 install torch_xla[tpuvm]
-        pip3 install --user --pre --no-deps torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+        pip3 install torch --index-url https://download.pytorch.org/whl/test/cpu
         pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-nightly%2B20230825-cp310-cp310-linux_x86_64.whl
         pip install torch_xla[tpuvm]
 
@@ -96,18 +96,30 @@ local utils = import 'templates/utils.libsonnet';
       |||,
     },
   },
-  local hf = self.hf,
-  hf:: common.PyTorchTpuVmMixin {
-    modelName+: '-h-f',
+  local spmd = self.spmd,
+  spmd:: common.PyTorchTpuVmMixin {
+    modelName+: '-train-spmd',
     tpuSettings+: {
+      tpuVmExports+: |||
+        export XLA_USE_BF16=1
+        export XLA_IR_DEBUG=1
+        export XLA_HLO_DEBUG=1
+        export BATCH_SIZE=32
+        export NUM_EPOCH=5
+        export PROFILE_EPOCH=2
+        export PROFILE_STEP=0
+        export PROFILE_DURATION_MS=20000
+        export XLA_USE_SPMD=1
+        export PJRT_DEVICE=TPU
+        export TPU_MEGACORE=megacore_dense
+      |||,
       tpuVmExtraSetup: |||
         pip3 uninstall torch torch_xla torchvision libtpu-nightly -y
-        sudo apt update -y
+        # sudo apt update -y
         sudo apt-get update -y
-        pip install accelerate -U
+        # pip install accelerate -U
         sudo apt-get install libomp5 -y
         pip3 install mkl mkl-include
-        pip3 install tf-nightly tb-nightly tbp-nightly
         pip3 install numpy
         sudo apt-get install numactl -y
         sudo apt-get install libopenblas-dev -y
@@ -115,21 +127,15 @@ local utils = import 'templates/utils.libsonnet';
         # pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch-nightly-cp310-cp310-linux_x86_64.whl
         # pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-nightly-cp310-cp310-linux_x86_64.whl
         # pip3 install torch_xla[tpuvm]
-        pip3 install --user --pre --no-deps torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+        pip3 install torch --index-url https://download.pytorch.org/whl/test/cpu
         pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-nightly%2B20230825-cp310-cp310-linux_x86_64.whl
         pip install torch_xla[tpuvm]
 
         # install tokenizer model
         wget https://storage.googleapis.com/tpu-pytorch/lsiyuan-experiment/llama/spiece.model
 
-        # git clone and build llama
-        git clone --branch llama2-google-next-inference https://github.com/pytorch-tpu/llama.git
-        cd llama
-        pip3 install -r requirements.txt
-        pip3 install -e .
-
-        # git clone and build transformers ### llama/transformers/
-        git clone -b lsiyuan/fsdp-data-aug https://github.com/pytorch-tpu/transformers.git
+        # git clone and build transformers ### transformers/
+        git clone -b llama2-google-next-training https://github.com/pytorch-tpu/transformers.git
         cd transformers
         sudo pip3 uninstall transformers
         sudo pip3 install -e .
@@ -140,13 +146,14 @@ local utils = import 'templates/utils.libsonnet';
         pwd
         ls
 
-        # 7B config
+        # 2B config
         mkdir 7B
         cd 7B/
-        wget https://storage.googleapis.com/tpu-pytorch/lsiyuan-experiment/configs/hf_llama/7B.json
+        wget https://storage.googleapis.com/manfei_public_experimental/2B.json
+        cat 2B.json
 
         # save llama2 training
-        echo -e 'python3 -u llama/transformers/examples/pytorch/xla_spawn.py --num_cores 64 llama/transformers/examples/pytorch/language-modeling/run_clm.py    --num_train_epochs 2  --dataset_name wikitext     --dataset_config_name wikitext-2-raw-v1     --per_device_train_batch_size 8 --do_train --output_dir . --overwrite_output_dir --config_name llama/transformers/7B/7B.json --cache_dir /tmp --tokenizer_name gpt2 --block_size 1024 --optim adafactor --adafactor true  --save_strategy no --logging_strategy no' >> llama2training.sh
+        echo -e 'python transformers/examples/pytorch/language-modeling/run_clm.py --tokenizer_name gpt2 --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 --per_device_train_batch_size 32 --per_device_eval_batch_size 8 --num_train_epochs 1 --do_train --output_dir /tmp/output --overwrite_output_dir --config_name transformers/7B/2B.json --save_strategy no --logging_strategy no --remove_unused_columns no --spmd_fsdp_sharding --torch_dtype bfloat16 --dataloader_drop_last yes --spmd_grad_chkpt --report_to none' >> llama2training.sh
         cat llama2training.sh
         pwd
         ls
@@ -160,7 +167,7 @@ local utils = import 'templates/utils.libsonnet';
   },
 
   configs: [
-    llama2_inference + v4_8 + common.Functional + timeouts.Hours(3) + pjrt,
-    llama2_training + v4_8 + common.Functional + timeouts.Hours(3) + hf,
+    llama2_inference + v4_8 + common.Functional + timeouts.Hours(3) + infer,
+    llama2_training + v4_8 + common.Functional + timeouts.Hours(3) + spmd,
   ],
 }
diff --git a/tests/pytorch/r2.1/sd-model.libsonnet b/tests/pytorch/r2.1/sd-model.libsonnet
index e3f2096cb..d903548f4 100644
--- a/tests/pytorch/r2.1/sd-model.libsonnet
+++ b/tests/pytorch/r2.1/sd-model.libsonnet
@@ -63,11 +63,7 @@ local utils = import 'templates/utils.libsonnet';
 
         # taming-transformers and CLIP override existing torch and torchvision so we need to reinstall
         # TODO change back to torch2.1 once pytorch released torch2.1
-        # pip uninstall -y torch torchvision
-        # pip install --user \
-        #   https://storage.googleapis.com/pytorch-xla-releases/wheels/xrt/tpuvm/torch-nightly-cp310-cp310-linux_x86_64.whl \
-        #   'torch_xla[tpuvm] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/xrt/tpuvm/torch_xla-nightly-cp310-cp310-linux_x86_64.whl'
-        # pip3 install --user --pre --no-deps torchvision --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+        pip uninstall -y torch torchvision
         pip3 install --user --pre --no-deps torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/cpu
         pip3 install https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-nightly%2B20230825-cp310-cp310-linux_x86_64.whl
         pip install torch_xla[tpuvm]