From 6ab25b44dd9910a7082fd285183ef9bd5fd2f3f2 Mon Sep 17 00:00:00 2001 From: Daniel King Date: Thu, 14 Dec 2023 19:47:00 -0800 Subject: [PATCH] temp comment out --- llmfoundry/models/hf/hf_causal_lm.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/llmfoundry/models/hf/hf_causal_lm.py b/llmfoundry/models/hf/hf_causal_lm.py index b4847611b9..d1ed50b0cb 100644 --- a/llmfoundry/models/hf/hf_causal_lm.py +++ b/llmfoundry/models/hf/hf_causal_lm.py @@ -107,20 +107,20 @@ def __init__(self, om_model_config: Union[DictConfig, om_model_config.pretrained_model_name_or_path, trust_remote_code=trust_remote_code, use_auth_token=use_auth_token, - attn_implementation=requested_attention_implementation, + # attn_implementation=requested_attention_implementation, ) # This is not ideal, however Hugging Face's _autoset_attn_implementation function # forces you to load the model in fp16/bf16 if you want to use flash attention. Rather than loading # the model and then casting it back to fp32, we are monkeypatching their check. # https://github.com/huggingface/transformers/issues/28052 - def _autoset_attn_implementation_monkeypatch( - cls, config, *args, **kwargs): # type: ignore - config._attn_implementation = requested_attention_implementation - return config + # def _autoset_attn_implementation_monkeypatch( + # cls, config, *args, **kwargs): # type: ignore + # config._attn_implementation = requested_attention_implementation + # return config - PreTrainedModel._autoset_attn_implementation = classmethod( - _autoset_attn_implementation_monkeypatch) + # PreTrainedModel._autoset_attn_implementation = classmethod( + # _autoset_attn_implementation_monkeypatch) # set config overrides for k, v in om_model_config.get('config_overrides', {}).items():