diff --git a/examples/llama-2/qlora-fsdp.yml b/examples/llama-2/qlora-fsdp.yml index b5abd0e39c..30916ed45a 100644 --- a/examples/llama-2/qlora-fsdp.yml +++ b/examples/llama-2/qlora-fsdp.yml @@ -66,9 +66,11 @@ weight_decay: 0.0 fsdp: - full_shard fsdp_config: - fsdp_cpu_ram_efficient_loading: true + fsdp_limit_all_gathers: true fsdp_sync_module_states: true fsdp_offload_params: true - fsdp_use_orig_params: true + fsdp_use_orig_params: false + fsdp_cpu_ram_efficient_loading: true fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer + fsdp_state_dict_type: SHARDED_STATE_DICT special_tokens: