You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
The model was loaded with use_flash_attention_2=True, which is deprecated and may be removed in a future release. Please use attn_implementation="flash_attention_2" instead.
Traceback (most recent call last):
File "/root/paddlejob/workspace/env_run/benchmark/generation/main.py", line 116, in
main()
File "/root/paddlejob/workspace/env_run/benchmark/generation/main.py", line 91, in main
pipeline = load_model_and_tokenizer(model_home, args.model, args.use_pipeline)
File "/root/paddlejob/workspace/env_run/benchmark/generation/load_models_and_datasets.py", line 26, in load_model_and_tokenizer
model = AutoModelForCausalLM.from_pretrained(
File "/root/paddlejob/workspace/env_run/lib/python3.9/site-packages/transformers/models/auto/auto_factory.py", line 561, in from_pretrained
return model_class.from_pretrained(
File "/root/paddlejob/workspace/env_run/lib/python3.9/site-packages/transformers/modeling_utils.py", line 3456, in from_pretrained
config = cls._autoset_attn_implementation(
File "/root/paddlejob/workspace/env_run/lib/python3.9/site-packages/transformers/modeling_utils.py", line 1302, in _autoset_attn_implementation
cls._check_and_enable_flash_attn_2(
File "/root/paddlejob/workspace/env_run/lib/python3.9/site-packages/transformers/modeling_utils.py", line 1382, in _check_and_enable_flash_attn_2
raise ValueError(
ValueError: LlamaForCausalLM does not support Flash Attention 2.0 yet. Please open an issue on GitHub to request support for this architecture: https://github.com/huggingface/transformers/issues/new
The text was updated successfully, but these errors were encountered:
The model was loaded with use_flash_attention_2=True, which is deprecated and may be removed in a future release. Please use
attn_implementation="flash_attention_2"
instead.Traceback (most recent call last):
File "/root/paddlejob/workspace/env_run/benchmark/generation/main.py", line 116, in
main()
File "/root/paddlejob/workspace/env_run/benchmark/generation/main.py", line 91, in main
pipeline = load_model_and_tokenizer(model_home, args.model, args.use_pipeline)
File "/root/paddlejob/workspace/env_run/benchmark/generation/load_models_and_datasets.py", line 26, in load_model_and_tokenizer
model = AutoModelForCausalLM.from_pretrained(
File "/root/paddlejob/workspace/env_run/lib/python3.9/site-packages/transformers/models/auto/auto_factory.py", line 561, in from_pretrained
return model_class.from_pretrained(
File "/root/paddlejob/workspace/env_run/lib/python3.9/site-packages/transformers/modeling_utils.py", line 3456, in from_pretrained
config = cls._autoset_attn_implementation(
File "/root/paddlejob/workspace/env_run/lib/python3.9/site-packages/transformers/modeling_utils.py", line 1302, in _autoset_attn_implementation
cls._check_and_enable_flash_attn_2(
File "/root/paddlejob/workspace/env_run/lib/python3.9/site-packages/transformers/modeling_utils.py", line 1382, in _check_and_enable_flash_attn_2
raise ValueError(
ValueError: LlamaForCausalLM does not support Flash Attention 2.0 yet. Please open an issue on GitHub to request support for this architecture: https://github.com/huggingface/transformers/issues/new
The text was updated successfully, but these errors were encountered: