diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py index 26d4e6e..9fa6c6a 100644 --- a/src/transformers/modeling_utils.py +++ b/src/transformers/modeling_utils.py @@ -189,8 +189,8 @@ def get_extended_attention_mask(self, attention_mask: Tensor, input_shape: Tuple # positions we want to attend and -10000.0 for masked positions. # Since we are adding it to the raw scores before the softmax, this is # effectively the same as removing these entirely. - exteneded_attention_mask = extended_attention_mask.to(dtype=self.dtype) - exteneded_attention_mask = (1.0 - extended_attention_mask) * -10000.0 + extended_attention_mask = extended_attention_mask.to(dtype=self.dtype) + extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 return extended_attention_mask def get_head_mask(