Fix ckpt convert bug (PaddlePaddle#9521)

* refine log * refine * refine * refine
DesmonDay · Nov 29, 2024 · 020a25f · 020a25f
1 parent 2985f90
commit 020a25f
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 3 deletions.
diff --git a/paddlenlp/trainer/utils/ckpt_converter.py b/paddlenlp/trainer/utils/ckpt_converter.py
@@ -270,7 +270,7 @@ def gen_metadata_and_prepare_source_state_dict(self):
             malloc_size = 0
             for opt_state_name, opt_state_value in optimizer_state_dict.items():
                 malloc_size += opt_state_value.numel() * opt_state_value.element_size()
-            malloc_size = malloc_size.numpy() / 2**20
+            malloc_size = malloc_size / 2**20
             logger.debug(f"{malloc_size} MB of GPU memory were allocated.")
 
             # merge sharding
@@ -555,7 +555,7 @@ def load_state_dict_and_rename(self):
             for k, v in state_dict.items():
                 memory_size += v.numel() * v.element_size()
 
-        memory_size = memory_size.numpy() / 2**20
+        memory_size = memory_size / 2**20
         logger.debug(
             f"The current rank has finished loading the checkpoint file and has allocated {memory_size} MB of GPU memory."
         )

diff --git a/scripts/distribute/ci_case_auto.sh b/scripts/distribute/ci_case_auto.sh
@@ -95,7 +95,6 @@ function llama_case_list_auto() {
         llama_dy2st_auto_bs4_bf16_DP1-MP1-PP4-SD2
         llama_align_dygraph_dy2st_auto_bs2_bf16_DP2-MP1-PP1
         llama_pir_auto_fuse_ffn_attention_qkv_MP2
-        llama_convert_hybrid_ckpt_to_auto_parallel_bs2_fp32_DP2-MP1-PP1
         llama_align_dygraph_dy2st_pir_auto_bs2_bf16_DP2-MP2-PP1-SP
         llama_align_dygraph_dy2st_pir_auto_bs2_bf16_DP2-MP2-PP2-SP
         llama_align_dygraph_dy2st_pir_auto_grad_merge_bs2_fp32_DP1-MP1-PP1