You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Traceback (most recent call last):
File "experiments/LongBench/pred_snap.py", line 321, in
File "/data1/ss/anaconda3/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "experiments/LongBench/pred_snap.py", line 132, in get_pred_single_gpu
File "/data1/ss/anaconda3/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/data1/ss/anaconda3/lib/python3.10/site-packages/transformers/generation/utils.py", line 1474, in generate
return self.greedy_search(
File "/data1/ss/anaconda3/lib/python3.10/site-packages/transformers/generation/utils.py", line 2335, in greedy_search
outputs = self(
File "/data1/ss/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data1/ss/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/data1/ss/anaconda3/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1183, in forward
outputs = self.model(
File "/data1/ss/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data1/ss/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/data1/ss/anaconda3/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1035, in forward
attention_mask = _prepare_4d_causal_attention_mask_for_sdpa(
File "/data1/ss/anaconda3/lib/python3.10/site-packages/transformers/modeling_attn_mask_utils.py", line 398, in _prepare_4d_causal_attention_mask_for_sdpa
expanded_4d_mask = attn_mask_converter.to_4d(
File "/data1/ss/anaconda3/lib/python3.10/site-packages/transformers/modeling_attn_mask_utils.py", line 137, in to_4d
expanded_attn_mask = causal_4d_mask.masked_fill(expanded_attn_mask.bool(), torch.finfo(dtype).min)
RuntimeError: The size of tensor a (3509) must match the size of tensor b (7017) at non-singleton dimension 3
I think the reason would be DynamicCache.get_usable_length conflict with the getting-casual-mask function _prepare_4d_causal_attention_mask_for_sdpa.
I would like to know how can I quick fix this. Thx :)
The text was updated successfully, but these errors were encountered:
Here is my env. The version of
transfomers
is meet the requirements inmonkeypatch.py
The traceback are as follows:
traceback
>> python pred_snap.py --model llama2-7b-chat-4k --compress_args_path ablation_c1024_w32_k7_maxpool.json
Traceback (most recent call last):
File "experiments/LongBench/pred_snap.py", line 321, in
File "/data1/ss/anaconda3/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "experiments/LongBench/pred_snap.py", line 132, in get_pred_single_gpu
File "/data1/ss/anaconda3/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/data1/ss/anaconda3/lib/python3.10/site-packages/transformers/generation/utils.py", line 1474, in generate
return self.greedy_search(
File "/data1/ss/anaconda3/lib/python3.10/site-packages/transformers/generation/utils.py", line 2335, in greedy_search
outputs = self(
File "/data1/ss/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data1/ss/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/data1/ss/anaconda3/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1183, in forward
outputs = self.model(
File "/data1/ss/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/data1/ss/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/data1/ss/anaconda3/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1035, in forward
attention_mask = _prepare_4d_causal_attention_mask_for_sdpa(
File "/data1/ss/anaconda3/lib/python3.10/site-packages/transformers/modeling_attn_mask_utils.py", line 398, in _prepare_4d_causal_attention_mask_for_sdpa
expanded_4d_mask = attn_mask_converter.to_4d(
File "/data1/ss/anaconda3/lib/python3.10/site-packages/transformers/modeling_attn_mask_utils.py", line 137, in to_4d
expanded_attn_mask = causal_4d_mask.masked_fill(expanded_attn_mask.bool(), torch.finfo(dtype).min)
RuntimeError: The size of tensor a (3509) must match the size of tensor b (7017) at non-singleton dimension 3
I think the reason would be
DynamicCache.get_usable_length
conflict with the getting-casual-mask function_prepare_4d_causal_attention_mask_for_sdpa
.I would like to know how can I quick fix this. Thx :)
The text was updated successfully, but these errors were encountered: