From 4b7a262d3eec51e8b5511371b1a4346d58c43e1e Mon Sep 17 00:00:00 2001 From: Daniel King Date: Thu, 28 Dec 2023 17:58:52 -0800 Subject: [PATCH 1/3] bump transformers --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c030fe3268..09c13455b8 100644 --- a/setup.py +++ b/setup.py @@ -49,7 +49,7 @@ install_requires = [ 'mosaicml[libcloud,wandb,mlflow,oci,gcs]>=0.17.1,<0.18', 'accelerate>=0.25,<0.26', # for HF inference `device_map` - 'transformers>=4.36,<4.37', + 'transformers>=4.36.2,<4.37', 'mosaicml-streaming>=0.7.1,<0.8', 'torch>=2.1,<2.1.1', 'datasets==2.15.0', From eb4a596ef79d91837249d3fe9797558e618a42b4 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Fri, 29 Dec 2023 00:28:34 -0800 Subject: [PATCH 2/3] try assign --- llmfoundry/callbacks/hf_checkpointer.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llmfoundry/callbacks/hf_checkpointer.py b/llmfoundry/callbacks/hf_checkpointer.py index 491d510188..83a729c627 100644 --- a/llmfoundry/callbacks/hf_checkpointer.py +++ b/llmfoundry/callbacks/hf_checkpointer.py @@ -24,6 +24,7 @@ from llmfoundry.models.mpt import MPTConfig, MPTForCausalLM from llmfoundry.utils.huggingface_hub_utils import \ edit_files_for_hf_compatibility +from llmfoundry.models.utils import init_empty_weights log = logging.getLogger(__name__) @@ -224,9 +225,10 @@ def _save_checkpoint(self, state: State, logger: Logger): # TODO: after torch 2.1, we can load a state dict into a meta model # and skip the extra model init log.debug(f'Creating new model instance') - new_model_instance = type(original_model)(copied_config) - new_model_instance.to(dtype=self.dtype) - new_model_instance.load_state_dict(state_dict) + with init_empty_weights(include_buffers=False): + new_model_instance = type(original_model)(copied_config) + # new_model_instance.to(dtype=self.dtype) + new_model_instance.load_state_dict(state_dict, assign=True) del state_dict log.debug('Saving Hugging Face checkpoint to disk') From 331212744268dcc0f9504b1bca7b70320de56e61 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Fri, 19 Jan 2024 16:58:16 -0800 Subject: [PATCH 3/3] revert assign --- llmfoundry/callbacks/hf_checkpointer.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/llmfoundry/callbacks/hf_checkpointer.py b/llmfoundry/callbacks/hf_checkpointer.py index 83a729c627..491d510188 100644 --- a/llmfoundry/callbacks/hf_checkpointer.py +++ b/llmfoundry/callbacks/hf_checkpointer.py @@ -24,7 +24,6 @@ from llmfoundry.models.mpt import MPTConfig, MPTForCausalLM from llmfoundry.utils.huggingface_hub_utils import \ edit_files_for_hf_compatibility -from llmfoundry.models.utils import init_empty_weights log = logging.getLogger(__name__) @@ -225,10 +224,9 @@ def _save_checkpoint(self, state: State, logger: Logger): # TODO: after torch 2.1, we can load a state dict into a meta model # and skip the extra model init log.debug(f'Creating new model instance') - with init_empty_weights(include_buffers=False): - new_model_instance = type(original_model)(copied_config) - # new_model_instance.to(dtype=self.dtype) - new_model_instance.load_state_dict(state_dict, assign=True) + new_model_instance = type(original_model)(copied_config) + new_model_instance.to(dtype=self.dtype) + new_model_instance.load_state_dict(state_dict) del state_dict log.debug('Saving Hugging Face checkpoint to disk')